307 files changed, 77999 insertions, 3851 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 61ae899cfc17..2f9d378edec3 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,32 +1,56 @@
 # SPDX-License-Identifier: GPL-2.0-only
+bind_bhash
+bind_timewait
+bind_wildcard
+csum
+cmsg_sender
+diag_uid
+fin_ack_lat
+gro
+hwtstamp_config
+io_uring_zerocopy_tx
+ioam6_parser
+ip_defrag
+ip_local_port_range
 ipsec
+ipv6_flowlabel
+ipv6_flowlabel_mgr
+log.txt
 msg_zerocopy
-socket
+nettest
 psock_fanout
 psock_snd
 psock_tpacket
+reuseaddr_conflict
+reuseaddr_ports_exhausted
 reuseport_addr_any
 reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
 reuseport_dualstack
-reuseaddr_conflict
-tcp_mmap
-udpgso
-udpgso_bench_rx
-udpgso_bench_tx
-tcp_inq
-tls
-txring_overwrite
-ip_defrag
-ipv6_flowlabel
-ipv6_flowlabel_mgr
+rxtimestamp
+sctp_hello
+scm_pidfd
+sk_bind_sendto_listen
+sk_connect_zero_addr
+socket
+so_incoming_cpu
+so_netns_cookie
 so_txtime
+stress_reuseport_listen
+tap
 tcp_fastopen_backup_key
-nettest
-fin_ack_lat
-reuseaddr_ports_exhausted
-hwtstamp_config
-rxtimestamp
+tcp_inq
+tcp_mmap
+test_unix_oob
 timestamping
+tls
+toeplitz
+tools
+tun
+txring_overwrite
 txtimestamp
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
+unix_connect
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ef352477cac6..7b6918d5f4af 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -2,7 +2,9 @@
 # Makefile for net selftests
 
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
-CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
+# Additional include paths needed by kselftest.h
+CFLAGS += -I../
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
 	      rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
@@ -11,8 +13,8 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
 TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
 TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
-TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
+TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
+TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
 TEST_PROGS += route_localnet.sh
 TEST_PROGS += reuseaddr_ports_exhausted.sh
 TEST_PROGS += txtimestamp.sh
@@ -21,23 +23,133 @@ TEST_PROGS += rxtimestamp.sh
 TEST_PROGS += devlink_port_split.py
 TEST_PROGS += drop_monitor_tests.sh
 TEST_PROGS += vrf_route_leaking.sh
-TEST_PROGS_EXTENDED := in_netns.sh
+TEST_PROGS += bareudp.sh
+TEST_PROGS += amt.sh
+TEST_PROGS += unicast_extensions.sh
+TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += udpgro_frglist.sh
+TEST_PROGS += veth.sh
+TEST_PROGS += ioam6.sh
+TEST_PROGS += gro.sh
+TEST_PROGS += gre_gso.sh
+TEST_PROGS += cmsg_so_mark.sh
+TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
+TEST_PROGS += netns-name.sh
+TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
+TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
+TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
+TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
+TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
+TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
+TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
+TEST_PROGS += srv6_end_flavors_test.sh
+TEST_PROGS += vrf_strict_mode_test.sh
+TEST_PROGS += arp_ndisc_evict_nocarrier.sh
+TEST_PROGS += ndisc_unsolicited_na_test.sh
+TEST_PROGS += arp_ndisc_untracked_subnets.sh
+TEST_PROGS += stress_reuseport_listen.sh
+TEST_PROGS += l2_tos_ttl_inherit.sh
+TEST_PROGS += bind_bhash.sh
+TEST_PROGS += ip_local_port_range.sh
+TEST_PROGS += rps_default_mask.sh
+TEST_PROGS += big_tcp.sh
+TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
 TEST_GEN_FILES += tcp_fastopen_backup_key
 TEST_GEN_FILES += fin_ack_lat
 TEST_GEN_FILES += reuseaddr_ports_exhausted
 TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
 TEST_GEN_FILES += ipsec
+TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap
+TEST_GEN_FILES += toeplitz
+TEST_GEN_FILES += cmsg_sender
+TEST_GEN_FILES += stress_reuseport_listen
+TEST_PROGS += test_vxlan_vnifiltering.sh
+TEST_GEN_FILES += io_uring_zerocopy_tx
+TEST_PROGS += io_uring_zerocopy_tx.sh
+TEST_GEN_FILES += bind_bhash
+TEST_GEN_PROGS += sk_bind_sendto_listen
+TEST_GEN_PROGS += sk_connect_zero_addr
+TEST_PROGS += test_ingress_egress_chaining.sh
+TEST_GEN_PROGS += so_incoming_cpu
+TEST_PROGS += sctp_vrf.sh
+TEST_GEN_FILES += sctp_hello
+TEST_GEN_FILES += csum
+TEST_GEN_FILES += nat6to4.o
+TEST_GEN_FILES += xdp_dummy.o
+TEST_GEN_FILES += ip_local_port_range
+TEST_GEN_FILES += bind_wildcard
+TEST_PROGS += test_vxlan_mdb.sh
+TEST_PROGS += test_bridge_neigh_suppress.sh
+TEST_PROGS += test_vxlan_nolocalbypass.sh
+TEST_PROGS += test_bridge_backup_port.sh
+TEST_PROGS += fdb_flush.sh
+TEST_PROGS += fq_band_pktlimit.sh
+TEST_PROGS += vlan_hw_filter.sh
+
+TEST_FILES := settings
+TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+
+TEST_INCLUDES := forwarding/lib.sh
 
-KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
-$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
+$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
+$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
+$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
+
+# Rules to generate bpf objs
+CLANG ?= clang
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+BPFDIR := $(abspath ../../../lib/bpf)
+APIDIR := $(abspath ../../../include/uapi)
+
+CCINCLUDE += -I../bpf
+CCINCLUDE += -I../../../../usr/include/
+CCINCLUDE += -I$(SCRATCH_DIR)/include
+
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+MAKE_DIRS := $(BUILD_DIR)/libbpf
+$(MAKE_DIRS):
+	mkdir -p $@
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
+$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
+	$(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
+	   $(APIDIR)/linux/bpf.h					       \
+	   | $(BUILD_DIR)/libbpf
+	$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/     \
+		    EXTRA_CFLAGS='-g -O0'				       \
+		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+EXTRA_CLEAN := $(SCRATCH_DIR)
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644
index 000000000000..221c387a7d7f
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -0,0 +1,4 @@
+CFLAGS += $(KHDR_INCLUDES)
+TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
new file mode 100644
index 000000000000..79a3dd75590e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <unistd.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/unix_diag.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(diag_uid)
+{
+	int netlink_fd;
+	int unix_fd;
+	__u32 inode;
+	__u64 cookie;
+};
+
+FIXTURE_VARIANT(diag_uid)
+{
+	int unshare;
+	int udiag_show;
+};
+
+FIXTURE_VARIANT_ADD(diag_uid, uid)
+{
+	.unshare = 0,
+	.udiag_show = UDIAG_SHOW_UID
+};
+
+FIXTURE_VARIANT_ADD(diag_uid, uid_unshare)
+{
+	.unshare = CLONE_NEWUSER,
+	.udiag_show = UDIAG_SHOW_UID
+};
+
+FIXTURE_SETUP(diag_uid)
+{
+	struct stat file_stat;
+	socklen_t optlen;
+	int ret;
+
+	if (variant->unshare)
+		ASSERT_EQ(unshare(variant->unshare), 0);
+
+	self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+	ASSERT_NE(self->netlink_fd, -1);
+
+	self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	ASSERT_NE(self->unix_fd, -1);
+
+	ret = fstat(self->unix_fd, &file_stat);
+	ASSERT_EQ(ret, 0);
+
+	self->inode = file_stat.st_ino;
+
+	optlen = sizeof(self->cookie);
+	ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen);
+	ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(diag_uid)
+{
+	close(self->netlink_fd);
+	close(self->unix_fd);
+}
+
+int send_request(struct __test_metadata *_metadata,
+		 FIXTURE_DATA(diag_uid) *self,
+		 const FIXTURE_VARIANT(diag_uid) *variant)
+{
+	struct {
+		struct nlmsghdr nlh;
+		struct unix_diag_req udr;
+	} req = {
+		.nlh = {
+			.nlmsg_len = sizeof(req),
+			.nlmsg_type = SOCK_DIAG_BY_FAMILY,
+			.nlmsg_flags = NLM_F_REQUEST
+		},
+		.udr = {
+			.sdiag_family = AF_UNIX,
+			.udiag_ino = self->inode,
+			.udiag_cookie = {
+				(__u32)self->cookie,
+				(__u32)(self->cookie >> 32)
+			},
+			.udiag_show = variant->udiag_show
+		}
+	};
+	struct sockaddr_nl nladdr = {
+		.nl_family = AF_NETLINK
+	};
+	struct iovec iov = {
+		.iov_base = &req,
+		.iov_len = sizeof(req)
+	};
+	struct msghdr msg = {
+		.msg_name = &nladdr,
+		.msg_namelen = sizeof(nladdr),
+		.msg_iov = &iov,
+		.msg_iovlen = 1
+	};
+
+	return sendmsg(self->netlink_fd, &msg, 0);
+}
+
+void render_response(struct __test_metadata *_metadata,
+		     struct unix_diag_req *udr, __u32 len)
+{
+	unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr));
+	struct rtattr *attr;
+	uid_t uid;
+
+	ASSERT_GT(len, sizeof(*udr));
+	ASSERT_EQ(udr->sdiag_family, AF_UNIX);
+
+	attr = (struct rtattr *)(udr + 1);
+	ASSERT_NE(RTA_OK(attr, rta_len), 0);
+	ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID);
+
+	uid = *(uid_t *)RTA_DATA(attr);
+	ASSERT_EQ(uid, getuid());
+}
+
+void receive_response(struct __test_metadata *_metadata,
+		      FIXTURE_DATA(diag_uid) *self)
+{
+	long buf[8192 / sizeof(long)];
+	struct sockaddr_nl nladdr = {
+		.nl_family = AF_NETLINK
+	};
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = sizeof(buf)
+	};
+	struct msghdr msg = {
+		.msg_name = &nladdr,
+		.msg_namelen = sizeof(nladdr),
+		.msg_iov = &iov,
+		.msg_iovlen = 1
+	};
+	struct nlmsghdr *nlh;
+	int ret;
+
+	ret = recvmsg(self->netlink_fd, &msg, 0);
+	ASSERT_GT(ret, 0);
+
+	nlh = (struct nlmsghdr *)buf;
+	ASSERT_NE(NLMSG_OK(nlh, ret), 0);
+	ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY);
+
+	render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len);
+
+	nlh = NLMSG_NEXT(nlh, ret);
+	ASSERT_EQ(NLMSG_OK(nlh, ret), 0);
+}
+
+TEST_F(diag_uid, 1)
+{
+	int ret;
+
+	ret = send_request(_metadata, self, variant);
+	ASSERT_GT(ret, 0);
+
+	receive_response(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
new file mode 100644
index 000000000000..7e534594167e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+#define _GNU_SOURCE
+#include <error.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/socket.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../../kselftest_harness.h"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...)                                                   \
+	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", __FILE__, __LINE__, \
+		clean_errno(), ##__VA_ARGS__)
+
+#ifndef SCM_PIDFD
+#define SCM_PIDFD 0x04
+#endif
+
+static void child_die()
+{
+	exit(1);
+}
+
+static int safe_int(const char *numstr, int *converted)
+{
+	char *err = NULL;
+	long sli;
+
+	errno = 0;
+	sli = strtol(numstr, &err, 0);
+	if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
+		return -ERANGE;
+
+	if (errno != 0 && sli == 0)
+		return -EINVAL;
+
+	if (err == numstr || *err != '\0')
+		return -EINVAL;
+
+	if (sli > INT_MAX || sli < INT_MIN)
+		return -ERANGE;
+
+	*converted = (int)sli;
+	return 0;
+}
+
+static int char_left_gc(const char *buffer, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (buffer[i] == ' ' || buffer[i] == '\t')
+			continue;
+
+		return i;
+	}
+
+	return 0;
+}
+
+static int char_right_gc(const char *buffer, size_t len)
+{
+	int i;
+
+	for (i = len - 1; i >= 0; i--) {
+		if (buffer[i] == ' ' || buffer[i] == '\t' ||
+		    buffer[i] == '\n' || buffer[i] == '\0')
+			continue;
+
+		return i + 1;
+	}
+
+	return 0;
+}
+
+static char *trim_whitespace_in_place(char *buffer)
+{
+	buffer += char_left_gc(buffer, strlen(buffer));
+	buffer[char_right_gc(buffer, strlen(buffer))] = '\0';
+	return buffer;
+}
+
+/* borrowed (with all helpers) from pidfd/pidfd_open_test.c */
+static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
+{
+	int ret;
+	char path[512];
+	FILE *f;
+	size_t n = 0;
+	pid_t result = -1;
+	char *line = NULL;
+
+	snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd);
+
+	f = fopen(path, "re");
+	if (!f)
+		return -1;
+
+	while (getline(&line, &n, f) != -1) {
+		char *numstr;
+
+		if (strncmp(line, key, keylen))
+			continue;
+
+		numstr = trim_whitespace_in_place(line + 4);
+		ret = safe_int(numstr, &result);
+		if (ret < 0)
+			goto out;
+
+		break;
+	}
+
+out:
+	free(line);
+	fclose(f);
+	return result;
+}
+
+static int cmsg_check(int fd)
+{
+	struct msghdr msg = { 0 };
+	struct cmsghdr *cmsg;
+	struct iovec iov;
+	struct ucred *ucred = NULL;
+	int data = 0;
+	char control[CMSG_SPACE(sizeof(struct ucred)) +
+		     CMSG_SPACE(sizeof(int))] = { 0 };
+	int *pidfd = NULL;
+	pid_t parent_pid;
+	int err;
+
+	iov.iov_base = &data;
+	iov.iov_len = sizeof(data);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	err = recvmsg(fd, &msg, 0);
+	if (err < 0) {
+		log_err("recvmsg");
+		return 1;
+	}
+
+	if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		log_err("recvmsg: truncated");
+		return 1;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
+	     cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_level == SOL_SOCKET &&
+		    cmsg->cmsg_type == SCM_PIDFD) {
+			if (cmsg->cmsg_len < sizeof(*pidfd)) {
+				log_err("CMSG parse: SCM_PIDFD wrong len");
+				return 1;
+			}
+
+			pidfd = (void *)CMSG_DATA(cmsg);
+		}
+
+		if (cmsg->cmsg_level == SOL_SOCKET &&
+		    cmsg->cmsg_type == SCM_CREDENTIALS) {
+			if (cmsg->cmsg_len < sizeof(*ucred)) {
+				log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+				return 1;
+			}
+
+			ucred = (void *)CMSG_DATA(cmsg);
+		}
+	}
+
+	/* send(pfd, "x", sizeof(char), 0) */
+	if (data != 'x') {
+		log_err("recvmsg: data corruption");
+		return 1;
+	}
+
+	if (!pidfd) {
+		log_err("CMSG parse: SCM_PIDFD not found");
+		return 1;
+	}
+
+	if (!ucred) {
+		log_err("CMSG parse: SCM_CREDENTIALS not found");
+		return 1;
+	}
+
+	/* pidfd from SCM_PIDFD should point to the parent process PID */
+	parent_pid =
+		get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
+	if (parent_pid != getppid()) {
+		log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+		return 1;
+	}
+
+	return 0;
+}
+
+struct sock_addr {
+	char sock_name[32];
+	struct sockaddr_un listen_addr;
+	socklen_t addrlen;
+};
+
+FIXTURE(scm_pidfd)
+{
+	int server;
+	pid_t client_pid;
+	int startup_pipe[2];
+	struct sock_addr server_addr;
+	struct sock_addr *client_addr;
+};
+
+FIXTURE_VARIANT(scm_pidfd)
+{
+	int type;
+	bool abstract;
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, stream_pathname)
+{
+	.type = SOCK_STREAM,
+	.abstract = 0,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, stream_abstract)
+{
+	.type = SOCK_STREAM,
+	.abstract = 1,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, dgram_pathname)
+{
+	.type = SOCK_DGRAM,
+	.abstract = 0,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, dgram_abstract)
+{
+	.type = SOCK_DGRAM,
+	.abstract = 1,
+};
+
+FIXTURE_SETUP(scm_pidfd)
+{
+	self->client_addr = mmap(NULL, sizeof(*self->client_addr), PROT_READ | PROT_WRITE,
+				 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(MAP_FAILED, self->client_addr);
+}
+
+FIXTURE_TEARDOWN(scm_pidfd)
+{
+	close(self->server);
+
+	kill(self->client_pid, SIGKILL);
+	waitpid(self->client_pid, NULL, 0);
+
+	if (!variant->abstract) {
+		unlink(self->server_addr.sock_name);
+		unlink(self->client_addr->sock_name);
+	}
+}
+
+static void fill_sockaddr(struct sock_addr *addr, bool abstract)
+{
+	char *sun_path_buf = (char *)&addr->listen_addr.sun_path;
+
+	addr->listen_addr.sun_family = AF_UNIX;
+	addr->addrlen = offsetof(struct sockaddr_un, sun_path);
+	snprintf(addr->sock_name, sizeof(addr->sock_name), "scm_pidfd_%d", getpid());
+	addr->addrlen += strlen(addr->sock_name);
+	if (abstract) {
+		*sun_path_buf = '\0';
+		addr->addrlen++;
+		sun_path_buf++;
+	} else {
+		unlink(addr->sock_name);
+	}
+	memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name));
+}
+
+static void client(FIXTURE_DATA(scm_pidfd) *self,
+		   const FIXTURE_VARIANT(scm_pidfd) *variant)
+{
+	int cfd;
+	socklen_t len;
+	struct ucred peer_cred;
+	int peer_pidfd;
+	pid_t peer_pid;
+	int on = 0;
+
+	cfd = socket(AF_UNIX, variant->type, 0);
+	if (cfd < 0) {
+		log_err("socket");
+		child_die();
+	}
+
+	if (variant->type == SOCK_DGRAM) {
+		fill_sockaddr(self->client_addr, variant->abstract);
+
+		if (bind(cfd, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen)) {
+			log_err("bind");
+			child_die();
+		}
+	}
+
+	if (connect(cfd, (struct sockaddr *)&self->server_addr.listen_addr,
+		    self->server_addr.addrlen) != 0) {
+		log_err("connect");
+		child_die();
+	}
+
+	on = 1;
+	if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+		log_err("Failed to set SO_PASSCRED");
+		child_die();
+	}
+
+	if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+		log_err("Failed to set SO_PASSPIDFD");
+		child_die();
+	}
+
+	close(self->startup_pipe[1]);
+
+	if (cmsg_check(cfd)) {
+		log_err("cmsg_check failed");
+		child_die();
+	}
+
+	/* skip further for SOCK_DGRAM as it's not applicable */
+	if (variant->type == SOCK_DGRAM)
+		return;
+
+	len = sizeof(peer_cred);
+	if (getsockopt(cfd, SOL_SOCKET, SO_PEERCRED, &peer_cred, &len)) {
+		log_err("Failed to get SO_PEERCRED");
+		child_die();
+	}
+
+	len = sizeof(peer_pidfd);
+	if (getsockopt(cfd, SOL_SOCKET, SO_PEERPIDFD, &peer_pidfd, &len)) {
+		log_err("Failed to get SO_PEERPIDFD");
+		child_die();
+	}
+
+	/* pid from SO_PEERCRED should point to the parent process PID */
+	if (peer_cred.pid != getppid()) {
+		log_err("peer_cred.pid != getppid(): %d != %d", peer_cred.pid, getppid());
+		child_die();
+	}
+
+	peer_pid = get_pid_from_fdinfo_file(peer_pidfd,
+					    "Pid:", sizeof("Pid:") - 1);
+	if (peer_pid != peer_cred.pid) {
+		log_err("peer_pid != peer_cred.pid: %d != %d", peer_pid, peer_cred.pid);
+		child_die();
+	}
+}
+
+TEST_F(scm_pidfd, test)
+{
+	int err;
+	int pfd;
+	int child_status = 0;
+
+	self->server = socket(AF_UNIX, variant->type, 0);
+	ASSERT_NE(-1, self->server);
+
+	fill_sockaddr(&self->server_addr, variant->abstract);
+
+	err = bind(self->server, (struct sockaddr *)&self->server_addr.listen_addr, self->server_addr.addrlen);
+	ASSERT_EQ(0, err);
+
+	if (variant->type == SOCK_STREAM) {
+		err = listen(self->server, 1);
+		ASSERT_EQ(0, err);
+	}
+
+	err = pipe(self->startup_pipe);
+	ASSERT_NE(-1, err);
+
+	self->client_pid = fork();
+	ASSERT_NE(-1, self->client_pid);
+	if (self->client_pid == 0) {
+		close(self->server);
+		close(self->startup_pipe[0]);
+		client(self, variant);
+		exit(0);
+	}
+	close(self->startup_pipe[1]);
+
+	if (variant->type == SOCK_STREAM) {
+		pfd = accept(self->server, NULL, NULL);
+		ASSERT_NE(-1, pfd);
+	} else {
+		pfd = self->server;
+	}
+
+	/* wait until the child arrives at checkpoint */
+	read(self->startup_pipe[0], &err, sizeof(int));
+	close(self->startup_pipe[0]);
+
+	if (variant->type == SOCK_DGRAM) {
+		err = sendto(pfd, "x", sizeof(char), 0, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen);
+		ASSERT_NE(-1, err);
+	} else {
+		err = send(pfd, "x", sizeof(char), 0);
+		ASSERT_NE(-1, err);
+	}
+
+	close(pfd);
+	waitpid(self->client_pid, &child_status, 0);
+	ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644
index 000000000000..a7c51889acd5
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -0,0 +1,436 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+	signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+	struct sigaction sa;
+
+	sa.sa_sigaction = sig_hand;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+	return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+	int flags = fcntl(fd, F_GETFL, 0);
+
+	if (set)
+		flags &= ~O_NONBLOCK;
+	else
+		flags |= O_NONBLOCK;
+	fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+	char cmd;
+
+	cmd = 'S';
+	write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+	char buf[5];
+
+	read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+	fflush(NULL);
+	unlink(sock_name);
+	kill(producer_id, SIGTERM);
+	exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+	int ans = -1;
+
+	if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+		perror("SIOCATMARK Failed");
+#endif
+	}
+	return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+	*c = ' ';
+	if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+		perror("Reading MSG_OOB Failed");
+#endif
+	}
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+	int len = 0;
+
+	memset(buf, size, '0');
+	len = read(pfd, buf, size);
+#ifdef DEBUG
+	if (len < 0)
+		perror("read failed");
+#endif
+	return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+	struct pollfd pfds[1];
+
+	pfds[0].fd = pfd;
+	pfds[0].events = event;
+	poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+	int cfd;
+	char buf[64];
+	int i;
+
+	memset(buf, 'x', sizeof(buf));
+	cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+	wait_for_signal(pipefd[0]);
+	if (connect(cfd, (struct sockaddr *)consumer_addr,
+		     sizeof(*consumer_addr)) != 0) {
+		perror("Connect failed");
+		kill(0, SIGTERM);
+		exit(1);
+	}
+
+	for (i = 0; i < 2; i++) {
+		/* Test 1: Test for SIGURG and OOB */
+		wait_for_signal(pipefd[0]);
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '@';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		wait_for_signal(pipefd[0]);
+
+		/* Test 2: Test for OOB being overwitten */
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '%';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '#';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		wait_for_signal(pipefd[0]);
+
+		/* Test 3: Test for SIOCATMARK */
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '@';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '%';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		memset(buf, 'x', sizeof(buf));
+		send(cfd, buf, sizeof(buf), 0);
+
+		wait_for_signal(pipefd[0]);
+
+		/* Test 4: Test for 1byte OOB msg */
+		memset(buf, 'x', sizeof(buf));
+		buf[0] = '@';
+		send(cfd, buf, 1, MSG_OOB);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	int lfd, pfd;
+	struct sockaddr_un consumer_addr, paddr;
+	socklen_t len = sizeof(consumer_addr);
+	char buf[1024];
+	int on = 0;
+	char oob;
+	int atmark;
+
+	lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+	memset(&consumer_addr, 0, sizeof(consumer_addr));
+	consumer_addr.sun_family = AF_UNIX;
+	sprintf(sock_name, "unix_oob_%d", getpid());
+	unlink(sock_name);
+	strcpy(consumer_addr.sun_path, sock_name);
+
+	if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+		  sizeof(consumer_addr))) != 0) {
+		perror("socket bind failed");
+		exit(1);
+	}
+
+	pipe(pipefd);
+
+	listen(lfd, 1);
+
+	producer_id = fork();
+	if (producer_id == 0) {
+		producer(&consumer_addr);
+		exit(0);
+	}
+
+	set_sig_handler(SIGURG);
+	signal_producer(pipefd[1]);
+
+	pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+	fcntl(pfd, F_SETOWN, getpid());
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 1:
+	 * veriyf that SIGURG is
+	 * delivered, 63 bytes are
+	 * read, oob is '@', and POLLPRI works.
+	 */
+	wait_for_data(pfd, POLLPRI);
+	read_oob(pfd, &oob);
+	len = read_data(pfd, buf, 1024);
+	if (!signal_recvd || len != 63 || oob != '@') {
+		fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+			 signal_recvd, len, oob);
+			die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 2:
+	 * Verify that the first OOB is over written by
+	 * the 2nd one and the first OOB is returned as
+	 * part of the read, and sigurg is received.
+	 */
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	len = 0;
+	while (len < 70)
+		len = recv(pfd, buf, 1024, MSG_PEEK);
+	len = read_data(pfd, buf, 1024);
+	read_oob(pfd, &oob);
+	if (!signal_recvd || len != 127 || oob != '#') {
+		fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+		signal_recvd, len, oob);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 3:
+	 * verify that 2nd oob over writes
+	 * the first one and read breaks at
+	 * oob boundary returning 127 bytes
+	 * and sigurg is received and atmark
+	 * is set.
+	 * oob is '%' and second read returns
+	 * 64 bytes.
+	 */
+	len = 0;
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	while (len < 150)
+		len = recv(pfd, buf, 1024, MSG_PEEK);
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	read_oob(pfd, &oob);
+
+	if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+		fprintf(stderr,
+			"Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
+			signal_recvd, len, oob, atmark);
+		die(1);
+	}
+
+	signal_recvd = 0;
+
+	len = read_data(pfd, buf, 1024);
+	if (len != 64) {
+		fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+			signal_recvd, len, oob);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 4:
+	 * verify that a single byte
+	 * oob message is delivered.
+	 * set non blocking mode and
+	 * check proper error is
+	 * returned and sigurg is
+	 * received and correct
+	 * oob is read.
+	 */
+
+	set_filemode(pfd, 0);
+
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	len = read_data(pfd, buf, 1024);
+	if ((len == -1) && (errno == 11))
+		len = 0;
+
+	read_oob(pfd, &oob);
+
+	if (!signal_recvd || len != 0 || oob != '@') {
+		fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+			 signal_recvd, len, oob);
+		die(1);
+	}
+
+	set_filemode(pfd, 1);
+
+	/* Inline Testing */
+
+	on = 1;
+	if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+		perror("SO_OOBINLINE");
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 1 -- Inline:
+	 * Check that SIGURG is
+	 * delivered and 63 bytes are
+	 * read and oob is '@'
+	 */
+
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	len = read_data(pfd, buf, 1024);
+
+	if (!signal_recvd || len != 63) {
+		fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+			signal_recvd, len);
+		die(1);
+	}
+
+	len = read_data(pfd, buf, 1024);
+
+	if (len != 1) {
+		fprintf(stderr,
+			 "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+			 signal_recvd, len, oob);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 2 -- Inline:
+	 * Verify that the first OOB is over written by
+	 * the 2nd one and read breaks correctly on
+	 * 2nd OOB boundary with the first OOB returned as
+	 * part of the read, and sigurg is delivered and
+	 * siocatmark returns true.
+	 * next read returns one byte, the oob byte
+	 * and siocatmark returns false.
+	 */
+	len = 0;
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	while (len < 70)
+		len = recv(pfd, buf, 1024, MSG_PEEK);
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	if (len != 127 || atmark != 1 || !signal_recvd) {
+		fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+			 len, atmark);
+		die(1);
+	}
+
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	if (len != 1 || buf[0] != '#' || atmark == 1) {
+		fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+			len, buf[0], atmark);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 3 -- Inline:
+	 * verify that 2nd oob over writes
+	 * the first one and read breaks at
+	 * oob boundary returning 127 bytes
+	 * and sigurg is received and siocatmark
+	 * is true after the read.
+	 * subsequent read returns 65 bytes
+	 * because of oob which should be '%'.
+	 */
+	len = 0;
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	while (len < 126)
+		len = recv(pfd, buf, 1024, MSG_PEEK);
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	if (!signal_recvd || len != 127 || !atmark) {
+		fprintf(stderr,
+			 "Test 3 Inline failed, sigurg %d len %d data %c\n",
+			 signal_recvd, len, buf[0]);
+		die(1);
+	}
+
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	if (len != 65 || buf[0] != '%' || atmark != 0) {
+		fprintf(stderr,
+			 "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+			 len, buf[0], atmark);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 4 -- Inline:
+	 * verify that a single
+	 * byte oob message is delivered
+	 * and read returns one byte, the oob
+	 * byte and sigurg is received
+	 */
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	len = read_data(pfd, buf, 1024);
+	if (!signal_recvd || len != 1 || buf[0] != '@') {
+		fprintf(stderr,
+			"Test 4 Inline failed, signal %d len %d data %c\n",
+		signal_recvd, len, buf[0]);
+		die(1);
+	}
+	die(0);
+}
diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c
new file mode 100644
index 000000000000..d799fd8f5c7c
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connect.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(unix_connect)
+{
+	int server, client;
+	int family;
+};
+
+FIXTURE_VARIANT(unix_connect)
+{
+	int type;
+	char sun_path[8];
+	int len;
+	int flags;
+	int err;
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_pathname)
+{
+	.type = SOCK_STREAM,
+	.sun_path = "test",
+	.len = 4 + 1,
+	.flags = 0,
+	.err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_abstract)
+{
+	.type = SOCK_STREAM,
+	.sun_path = "\0test",
+	.len = 5,
+	.flags = 0,
+	.err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_pathname_netns)
+{
+	.type = SOCK_STREAM,
+	.sun_path = "test",
+	.len = 4 + 1,
+	.flags = CLONE_NEWNET,
+	.err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_abstract_netns)
+{
+	.type = SOCK_STREAM,
+	.sun_path = "\0test",
+	.len = 5,
+	.flags = CLONE_NEWNET,
+	.err = ECONNREFUSED,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname)
+{
+	.type = SOCK_DGRAM,
+	.sun_path = "test",
+	.len = 4 + 1,
+	.flags = 0,
+	.err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract)
+{
+	.type = SOCK_DGRAM,
+	.sun_path = "\0test",
+	.len = 5,
+	.flags = 0,
+	.err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname_netns)
+{
+	.type = SOCK_DGRAM,
+	.sun_path = "test",
+	.len = 4 + 1,
+	.flags = CLONE_NEWNET,
+	.err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract_netns)
+{
+	.type = SOCK_DGRAM,
+	.sun_path = "\0test",
+	.len = 5,
+	.flags = CLONE_NEWNET,
+	.err = ECONNREFUSED,
+};
+
+FIXTURE_SETUP(unix_connect)
+{
+	self->family = AF_UNIX;
+}
+
+FIXTURE_TEARDOWN(unix_connect)
+{
+	close(self->server);
+	close(self->client);
+
+	if (variant->sun_path[0])
+		remove("test");
+}
+
+TEST_F(unix_connect, test)
+{
+	socklen_t addrlen;
+	struct sockaddr_un addr = {
+		.sun_family = self->family,
+	};
+	int err;
+
+	self->server = socket(self->family, variant->type, 0);
+	ASSERT_NE(-1, self->server);
+
+	addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len;
+	memcpy(&addr.sun_path, variant->sun_path, variant->len);
+
+	err = bind(self->server, (struct sockaddr *)&addr, addrlen);
+	ASSERT_EQ(0, err);
+
+	if (variant->type == SOCK_STREAM) {
+		err = listen(self->server, 32);
+		ASSERT_EQ(0, err);
+	}
+
+	err = unshare(variant->flags);
+	ASSERT_EQ(0, err);
+
+	self->client = socket(self->family, variant->type, 0);
+	ASSERT_LT(0, self->client);
+
+	err = connect(self->client, (struct sockaddr *)&addr, addrlen);
+	ASSERT_EQ(variant->err, err == -1 ? errno : 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh
index 4254ddc3f70b..1ef9e4159bba 100755
--- a/tools/testing/selftests/net/altnames.sh
+++ b/tools/testing/selftests/net/altnames.sh
@@ -45,7 +45,7 @@ altnames_test()
 	check_err $? "Got unexpected long alternative name from link show JSON"
 
 	ip link property del $DUMMY_DEV altname $SHORT_NAME
-	check_err $? "Failed to add short alternative name"
+	check_err $? "Failed to delete short alternative name"
 
 	ip -j -p link show $SHORT_NAME &>/dev/null
 	check_fail $? "Unexpected success while trying to do link show with deleted short alternative name"
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
new file mode 100755
index 000000000000..75528788cb95
--- /dev/null
+++ b/tools/testing/selftests/net/amt.sh
@@ -0,0 +1,284 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Author: Taehee Yoo <ap420073@gmail.com>
+#
+# This script evaluates the AMT driver.
+# There are four network-namespaces, LISTENER, SOURCE, GATEWAY, RELAY.
+# The role of LISTENER is to listen multicast traffic.
+# In order to do that, it send IGMP group join message.
+# The role of SOURCE is to send multicast traffic to listener.
+# The role of GATEWAY is to work Gateway role of AMT interface.
+# The role of RELAY is to work Relay role of AMT interface.
+#
+#
+#       +------------------------+
+#       |    LISTENER netns      |
+#       |                        |
+#       |  +------------------+  |
+#       |  |       l_gw       |  |
+#       |  |  192.168.0.2/24  |  |
+#       |  |  2001:db8::2/64  |  |
+#       |  +------------------+  |
+#       |            .           |
+#       +------------------------+
+#                    .
+#                    .
+#       +-----------------------------------------------------+
+#       |            .         GATEWAY netns                  |
+#       |            .                                        |
+#       |+---------------------------------------------------+|
+#       ||           .          br0                          ||
+#       || +------------------+       +------------------+   ||
+#       || |       gw_l       |       |       amtg       |   ||
+#       || |  192.168.0.1/24  |       +--------+---------+   ||
+#       || |  2001:db8::1/64  |                |             ||
+#       || +------------------+                |             ||
+#       |+-------------------------------------|-------------+|
+#       |                                      |              |
+#       |                             +--------+---------+    |
+#       |                             |     gw_relay     |    |
+#       |                             |    10.0.0.1/24   |    |
+#       |                             +------------------+    |
+#       |                                      .              |
+#       +-----------------------------------------------------+
+#                                              .
+#                                              .
+#       +-----------------------------------------------------+
+#       |                       RELAY netns    .              |
+#       |                             +------------------+    |
+#       |                             |     relay_gw     |    |
+#       |                             |    10.0.0.2/24   |    |
+#       |                             +--------+---------+    |
+#       |                                      |              |
+#       |                                      |              |
+#       |  +------------------+       +--------+---------+    |
+#       |  |     relay_src    |       |       amtr       |    |
+#       |  |   172.17.0.1/24  |       +------------------+    |
+#       |  | 2001:db8:3::1/64 |                               |
+#       |  +------------------+                               |
+#       |            .                                        |
+#       |            .                                        |
+#       +-----------------------------------------------------+
+#                    .
+#                    .
+#       +------------------------+
+#       |            .           |
+#       |  +------------------+  |
+#       |  |     src_relay    |  |
+#       |  |   172.17.0.2/24  |  |
+#       |  | 2001:db8:3::2/64 |  |
+#       |  +------------------+  |
+#       |      SOURCE netns      |
+#       +------------------------+
+#==============================================================================
+
+readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
+readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
+readonly RELAY=$(mktemp -u relay-XXXXXXXX)
+readonly SOURCE=$(mktemp -u source-XXXXXXXX)
+ERR=4
+err=0
+
+exit_cleanup()
+{
+	for ns in "$@"; do
+		ip netns delete "${ns}" 2>/dev/null || true
+	done
+
+	exit $ERR
+}
+
+create_namespaces()
+{
+	ip netns add "${LISTENER}" || exit_cleanup
+	ip netns add "${GATEWAY}" || exit_cleanup "${LISTENER}"
+	ip netns add "${RELAY}" || exit_cleanup "${LISTENER}" "${GATEWAY}"
+	ip netns add "${SOURCE}" || exit_cleanup "${LISTENER}" "${GATEWAY}" \
+		"${RELAY}"
+}
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+	exit_cleanup "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}"
+}
+
+setup_interface()
+{
+	for ns in "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}"; do
+		ip -netns "${ns}" link set dev lo up
+	done;
+
+	ip link add l_gw type veth peer name gw_l
+	ip link add gw_relay type veth peer name relay_gw
+	ip link add relay_src type veth peer name src_relay
+
+	ip link set l_gw netns "${LISTENER}" up
+	ip link set gw_l netns "${GATEWAY}" up
+	ip link set gw_relay netns "${GATEWAY}" up
+	ip link set relay_gw netns "${RELAY}" up
+	ip link set relay_src netns "${RELAY}" up
+	ip link set src_relay netns "${SOURCE}" up mtu 1400
+
+	ip netns exec "${LISTENER}" ip a a 192.168.0.2/24 dev l_gw
+	ip netns exec "${LISTENER}" ip r a default via 192.168.0.1 dev l_gw
+	ip netns exec "${LISTENER}" ip a a 2001:db8::2/64 dev l_gw
+	ip netns exec "${LISTENER}" ip r a default via 2001:db8::1 dev l_gw
+	ip netns exec "${LISTENER}" ip a a 239.0.0.1/32 dev l_gw autojoin
+	ip netns exec "${LISTENER}" ip a a ff0e::5:6/128 dev l_gw autojoin
+
+	ip netns exec "${GATEWAY}" ip a a 192.168.0.1/24 dev gw_l
+	ip netns exec "${GATEWAY}" ip a a 2001:db8::1/64 dev gw_l
+	ip netns exec "${GATEWAY}" ip a a 10.0.0.1/24 dev gw_relay
+	ip netns exec "${GATEWAY}" ip link add br0 type bridge
+	ip netns exec "${GATEWAY}" ip link set br0 up
+	ip netns exec "${GATEWAY}" ip link set gw_l master br0
+	ip netns exec "${GATEWAY}" ip link set gw_l up
+	ip netns exec "${GATEWAY}" ip link add amtg master br0 type amt \
+		mode gateway local 10.0.0.1 discovery 10.0.0.2 dev gw_relay \
+		gateway_port 2268 relay_port 2268
+	ip netns exec "${RELAY}" ip a a 10.0.0.2/24 dev relay_gw
+	ip netns exec "${RELAY}" ip link add amtr type amt mode relay \
+		local 10.0.0.2 dev relay_gw relay_port 2268 max_tunnels 4
+	ip netns exec "${RELAY}" ip a a 172.17.0.1/24 dev relay_src
+	ip netns exec "${RELAY}" ip a a 2001:db8:3::1/64 dev relay_src
+	ip netns exec "${SOURCE}" ip a a 172.17.0.2/24 dev src_relay
+	ip netns exec "${SOURCE}" ip a a 2001:db8:3::2/64 dev src_relay
+	ip netns exec "${SOURCE}" ip r a default via 172.17.0.1 dev src_relay
+	ip netns exec "${SOURCE}" ip r a default via 2001:db8:3::1 dev src_relay
+	ip netns exec "${RELAY}" ip link set amtr up
+	ip netns exec "${GATEWAY}" ip link set amtg up
+}
+
+setup_sysctl()
+{
+	ip netns exec "${RELAY}" sysctl net.ipv4.ip_forward=1 -w -q
+}
+
+setup_iptables()
+{
+	ip netns exec "${RELAY}" iptables -t mangle -I PREROUTING \
+		-d 239.0.0.1 -j TTL --ttl-set 2
+	ip netns exec "${RELAY}" ip6tables -t mangle -I PREROUTING \
+		-j HL --hl-set 2
+}
+
+setup_mcast_routing()
+{
+	ip netns exec "${RELAY}" smcrouted
+	ip netns exec "${RELAY}" smcroutectl a relay_src \
+		172.17.0.2 239.0.0.1 amtr
+	ip netns exec "${RELAY}" smcroutectl a relay_src \
+		2001:db8:3::2 ff0e::5:6 amtr
+}
+
+test_remote_ip()
+{
+	REMOTE=$(ip netns exec "${GATEWAY}" \
+		ip -d -j link show amtg | jq .[0].linkinfo.info_data.remote)
+	if [ $REMOTE == "\"10.0.0.2\"" ]; then
+		printf "TEST: %-60s  [ OK ]\n" "amt discovery"
+	else
+		printf "TEST: %-60s  [FAIL]\n" "amt discovery"
+		ERR=1
+	fi
+}
+
+send_mcast_torture4()
+{
+	ip netns exec "${SOURCE}" bash -c \
+		'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001'
+}
+
+
+send_mcast_torture6()
+{
+	ip netns exec "${SOURCE}" bash -c \
+		'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001'
+}
+
+check_features()
+{
+        ip link help 2>&1 | grep -q amt
+        if [ $? -ne 0 ]; then
+                echo "Missing amt support in iproute2" >&2
+                exit_cleanup
+        fi
+}
+
+test_ipv4_forward()
+{
+	RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
+	if [ "$RESULT4" == "172.17.0.2" ]; then
+		printf "TEST: %-60s  [ OK ]\n" "IPv4 amt multicast forwarding"
+		exit 0
+	else
+		printf "TEST: %-60s  [FAIL]\n" "IPv4 amt multicast forwarding"
+		exit 1
+	fi
+}
+
+test_ipv6_forward()
+{
+	RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000)
+	if [ "$RESULT6" == "2001:db8:3::2" ]; then
+		printf "TEST: %-60s  [ OK ]\n" "IPv6 amt multicast forwarding"
+		exit 0
+	else
+		printf "TEST: %-60s  [FAIL]\n" "IPv6 amt multicast forwarding"
+		exit 1
+	fi
+}
+
+send_mcast4()
+{
+	sleep 2
+	ip netns exec "${SOURCE}" bash -c \
+		'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
+}
+
+send_mcast6()
+{
+	sleep 2
+	ip netns exec "${SOURCE}" bash -c \
+		'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
+}
+
+check_features
+
+create_namespaces
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_interface
+setup_sysctl
+setup_iptables
+setup_mcast_routing
+test_remote_ip
+test_ipv4_forward &
+pid=$!
+send_mcast4
+wait $pid || err=$?
+if [ $err -eq 1 ]; then
+	ERR=1
+fi
+test_ipv6_forward &
+pid=$!
+send_mcast6
+wait $pid || err=$?
+if [ $err -eq 1 ]; then
+	ERR=1
+fi
+send_mcast_torture4
+printf "TEST: %-60s  [ OK ]\n" "IPv4 amt traffic forwarding torture"
+send_mcast_torture6
+printf "TEST: %-60s  [ OK ]\n" "IPv6 amt traffic forwarding torture"
+sleep 5
+if [ "${ERR}" -eq 1 ]; then
+        echo "Some tests failed." >&2
+else
+        ERR=0
+fi
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
new file mode 100755
index 000000000000..92eb880c52f2
--- /dev/null
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Tests sysctl options {arp,ndisc}_evict_nocarrier={0,1}
+#
+# Create a veth pair and set IPs/routes on both. Then ping to establish
+# an entry in the ARP/ND table. Depending on the test set sysctl option to
+# 1 or 0. Set remote veth down which will cause local veth to go into a no
+# carrier state. Depending on the test check the ARP/ND table:
+#
+# {arp,ndisc}_evict_nocarrier=1 should contain no ARP/ND after no carrier
+# {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry
+#
+
+source lib.sh
+
+readonly V4_ADDR0=10.0.10.1
+readonly V4_ADDR1=10.0.10.2
+readonly V6_ADDR0=2001:db8:91::1
+readonly V6_ADDR1=2001:db8:91::2
+nsid=100
+ret=0
+
+cleanup_v6()
+{
+    cleanup_ns ${me} ${peer}
+
+    sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+    sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+}
+
+setup_v6() {
+    setup_ns me peer
+
+    IP="ip -netns ${me}"
+
+    $IP li add veth1 type veth peer name veth2
+    $IP li set veth1 up
+    $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad
+    $IP li set veth2 netns ${peer} up
+    ip -netns ${peer} -6 addr add $V6_ADDR1/64 dev veth2 nodad
+
+    ip netns exec ${me} sysctl -w $1 >/dev/null 2>&1
+
+    # Establish an ND cache entry
+    ip netns exec ${me} ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1
+    # Should have the veth1 entry in ND table
+    ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+        cleanup_v6
+        echo "failed"
+        exit 1
+    fi
+
+    # Set veth2 down, which will put veth1 in NOCARRIER state
+    ip netns exec ${peer} ip link set veth2 down
+}
+
+setup_v4() {
+    setup_ns PEER_NS
+    ip link add name veth0 type veth peer name veth1
+    ip link set dev veth0 up
+    ip link set dev veth1 netns "${PEER_NS}"
+    ip netns exec "${PEER_NS}" ip link set dev veth1 up
+    ip addr add $V4_ADDR0/24 dev veth0
+    ip netns exec "${PEER_NS}" ip addr add $V4_ADDR1/24 dev veth1
+    ip netns exec ${PEER_NS} ip route add default via $V4_ADDR1 dev veth1
+    ip route add default via $V4_ADDR0 dev veth0
+
+    sysctl -w "$1" >/dev/null 2>&1
+
+    # Establish an ARP cache entry
+    ping -c1 -I veth0 $V4_ADDR1 -q >/dev/null 2>&1
+    # Should have the veth1 entry in ARP table
+    ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+        cleanup_v4
+        echo "failed"
+        exit 1
+    fi
+
+    # Set veth1 down, which will put veth0 in NOCARRIER state
+    ip netns exec "${PEER_NS}" ip link set veth1 down
+}
+
+cleanup_v4() {
+    ip neigh flush dev veth0
+    ip link del veth0
+    cleanup_ns $PEER_NS
+
+    sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1
+    sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1
+}
+
+# Run test when arp_evict_nocarrier = 1 (default).
+run_arp_evict_nocarrier_enabled() {
+    echo "run arp_evict_nocarrier=1 test"
+    setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=1"
+
+    # ARP table should be empty
+    ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "failed"
+        ret=1
+    else
+        echo "ok"
+    fi
+
+    cleanup_v4
+}
+
+# Run test when arp_evict_nocarrier = 0
+run_arp_evict_nocarrier_disabled() {
+    echo "run arp_evict_nocarrier=0 test"
+    setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=0"
+
+    # ARP table should still contain the entry
+    ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "ok"
+    else
+        echo "failed"
+        ret=1
+    fi
+
+    cleanup_v4
+}
+
+run_arp_evict_nocarrier_disabled_all() {
+    echo "run all.arp_evict_nocarrier=0 test"
+    setup_v4 "net.ipv4.conf.all.arp_evict_nocarrier=0"
+
+    # ARP table should still contain the entry
+    ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "ok"
+    else
+        echo "failed"
+    fi
+
+    cleanup_v4
+}
+
+run_ndisc_evict_nocarrier_enabled() {
+    echo "run ndisc_evict_nocarrier=1 test"
+
+    setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1"
+
+    ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "failed"
+        ret=1
+    else
+        echo "ok"
+    fi
+
+    cleanup_v6
+}
+
+run_ndisc_evict_nocarrier_disabled() {
+    echo "run ndisc_evict_nocarrier=0 test"
+
+    setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0"
+
+    ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "ok"
+    else
+        echo "failed"
+        ret=1
+    fi
+
+    cleanup_v6
+}
+
+run_ndisc_evict_nocarrier_disabled_all() {
+    echo "run all.ndisc_evict_nocarrier=0 test"
+
+    setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0"
+
+    ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "ok"
+    else
+        echo "failed"
+        ret=1
+    fi
+
+    cleanup_v6
+}
+
+run_all_tests() {
+    run_arp_evict_nocarrier_enabled
+    run_arp_evict_nocarrier_disabled
+    run_arp_evict_nocarrier_disabled_all
+    run_ndisc_evict_nocarrier_enabled
+    run_ndisc_evict_nocarrier_disabled
+    run_ndisc_evict_nocarrier_disabled_all
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+run_all_tests
+exit $ret
diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
new file mode 100755
index 000000000000..a40c0e9bd023
--- /dev/null
+++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# 2 namespaces: one host and one router. Use arping from the host to send a
+# garp to the router. Router accepts or ignores based on its arp_accept
+# or accept_untracked_na configuration.
+
+source lib.sh
+
+TESTS="arp ndisc"
+
+ROUTER_INTF="veth-router"
+ROUTER_ADDR="10.0.10.1"
+ROUTER_ADDR_V6="2001:db8:abcd:0012::1"
+
+HOST_INTF="veth-host"
+HOST_ADDR="10.0.10.2"
+HOST_ADDR_V6="2001:db8:abcd:0012::2"
+
+SUBNET_WIDTH=24
+PREFIX_WIDTH_V6=64
+
+cleanup() {
+	cleanup_ns ${HOST_NS} ${ROUTER_NS}
+}
+
+cleanup_v6() {
+	cleanup_ns ${HOST_NS_V6} ${ROUTER_NS_V6}
+}
+
+setup() {
+	set -e
+	local arp_accept=$1
+
+	# Set up two namespaces
+	setup_ns HOST_NS ROUTER_NS
+
+	# Set up interfaces veth0 and veth1, which are pairs in separate
+	# namespaces. veth0 is veth-router, veth1 is veth-host.
+	# first, set up the inteface's link to the namespace
+	# then, set the interface "up"
+	ip netns exec ${ROUTER_NS} ip link add name ${ROUTER_INTF} \
+		type veth peer name ${HOST_INTF}
+
+	ip netns exec ${ROUTER_NS} ip link set dev ${ROUTER_INTF} up
+	ip netns exec ${ROUTER_NS} ip link set dev ${HOST_INTF} netns ${HOST_NS}
+
+	ip netns exec ${HOST_NS} ip link set dev ${HOST_INTF} up
+	ip netns exec ${ROUTER_NS} ip addr add ${ROUTER_ADDR}/${SUBNET_WIDTH} \
+		dev ${ROUTER_INTF}
+
+	ip netns exec ${HOST_NS} ip addr add ${HOST_ADDR}/${SUBNET_WIDTH} \
+		dev ${HOST_INTF}
+	ip netns exec ${HOST_NS} ip route add default via ${HOST_ADDR} \
+		dev ${HOST_INTF}
+	ip netns exec ${ROUTER_NS} ip route add default via ${ROUTER_ADDR} \
+		dev ${ROUTER_INTF}
+
+	ROUTER_CONF=net.ipv4.conf.${ROUTER_INTF}
+	ip netns exec ${ROUTER_NS} sysctl -w \
+		${ROUTER_CONF}.arp_accept=${arp_accept} >/dev/null 2>&1
+	set +e
+}
+
+setup_v6() {
+	set -e
+	local accept_untracked_na=$1
+
+	# Set up two namespaces
+	setup_ns HOST_NS_V6 ROUTER_NS_V6
+
+	# Set up interfaces veth0 and veth1, which are pairs in separate
+	# namespaces. veth0 is veth-router, veth1 is veth-host.
+	# first, set up the inteface's link to the namespace
+	# then, set the interface "up"
+	ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \
+		type veth peer name ${HOST_INTF}
+
+	ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up
+	ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \
+		${HOST_NS_V6}
+
+	ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up
+	ip -6 -netns ${ROUTER_NS_V6} addr add \
+		${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad
+
+	HOST_CONF=net.ipv6.conf.${HOST_INTF}
+	ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1
+	ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0
+	ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \
+		dev ${HOST_INTF}
+
+	ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
+
+	ip netns exec ${ROUTER_NS_V6} sysctl -w \
+		${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1
+	ip netns exec ${ROUTER_NS_V6} sysctl -w \
+		${ROUTER_CONF}.drop_unsolicited_na=0 >/dev/null 2>&1
+	ip netns exec ${ROUTER_NS_V6} sysctl -w \
+		${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \
+		>/dev/null 2>&1
+	set +e
+}
+
+verify_arp() {
+	local arp_accept=$1
+	local same_subnet=$2
+
+	neigh_show_output=$(ip netns exec ${ROUTER_NS} ip neigh get \
+		${HOST_ADDR} dev ${ROUTER_INTF} 2>/dev/null)
+
+	if [ ${arp_accept} -eq 1 ]; then
+		# Neighbor entries expected
+		[[ ${neigh_show_output} ]]
+	elif [ ${arp_accept} -eq 2 ]; then
+		if [ ${same_subnet} -eq 1 ]; then
+			# Neighbor entries expected
+			[[ ${neigh_show_output} ]]
+		else
+			[[ -z "${neigh_show_output}" ]]
+		fi
+	else
+		[[ -z "${neigh_show_output}" ]]
+	fi
+ }
+
+arp_test_gratuitous() {
+	set -e
+	local arp_accept=$1
+	local same_subnet=$2
+
+	if [ ${arp_accept} -eq 2 ]; then
+		test_msg=("test_arp: "
+			  "accept_arp=$1 "
+			  "same_subnet=$2")
+		if [ ${same_subnet} -eq 0 ]; then
+			HOST_ADDR=10.0.11.3
+		else
+			HOST_ADDR=10.0.10.3
+		fi
+	else
+		test_msg=("test_arp: "
+			  "accept_arp=$1")
+	fi
+	# Supply arp_accept option to set up which sets it in sysctl
+	setup ${arp_accept}
+	ip netns exec ${HOST_NS} arping -A -I ${HOST_INTF} -U ${HOST_ADDR} -c1 2>&1 >/dev/null
+
+	if verify_arp $1 $2; then
+		printf "    TEST: %-60s  [ OK ]\n" "${test_msg[*]}"
+	else
+		printf "    TEST: %-60s  [FAIL]\n" "${test_msg[*]}"
+	fi
+	cleanup
+	set +e
+}
+
+arp_test_gratuitous_combinations() {
+	arp_test_gratuitous 0
+	arp_test_gratuitous 1
+	arp_test_gratuitous 2 0 # Second entry indicates subnet or not
+	arp_test_gratuitous 2 1
+}
+
+cleanup_tcpdump() {
+	set -e
+	[[ ! -z  ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
+	[[ ! -z  ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
+	tcpdump_stdout=
+	tcpdump_stderr=
+	set +e
+}
+
+start_tcpdump() {
+	set -e
+	tcpdump_stdout=`mktemp`
+	tcpdump_stderr=`mktemp`
+	ip netns exec ${ROUTER_NS_V6} timeout 15s \
+		tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
+		"icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \
+		> ${tcpdump_stdout} 2> /dev/null
+	set +e
+}
+
+verify_ndisc() {
+	local accept_untracked_na=$1
+	local same_subnet=$2
+
+	neigh_show_output=$(ip -6 -netns ${ROUTER_NS_V6} neigh show \
+		to ${HOST_ADDR_V6} dev ${ROUTER_INTF} nud stale)
+
+	if [ ${accept_untracked_na} -eq 1 ]; then
+		# Neighbour entry expected to be present
+		[[ ${neigh_show_output} ]]
+	elif [ ${accept_untracked_na} -eq 2 ]; then
+		if [ ${same_subnet} -eq 1 ]; then
+			[[ ${neigh_show_output} ]]
+		else
+			[[ -z "${neigh_show_output}" ]]
+		fi
+	else
+		# Neighbour entry expected to be absent for all other cases
+		[[ -z "${neigh_show_output}" ]]
+	fi
+}
+
+ndisc_test_untracked_advertisements() {
+	set -e
+	test_msg=("test_ndisc: "
+		  "accept_untracked_na=$1")
+
+	local accept_untracked_na=$1
+	local same_subnet=$2
+	if [ ${accept_untracked_na} -eq 2 ]; then
+		test_msg=("test_ndisc: "
+			  "accept_untracked_na=$1 "
+			  "same_subnet=$2")
+		if [ ${same_subnet} -eq 0 ]; then
+			# Not same subnet
+			HOST_ADDR_V6=2000:db8:abcd:0013::4
+		else
+			HOST_ADDR_V6=2001:db8:abcd:0012::3
+		fi
+	fi
+	setup_v6 $1 $2
+	start_tcpdump
+
+	if verify_ndisc $1 $2; then
+		printf "    TEST: %-60s  [ OK ]\n" "${test_msg[*]}"
+	else
+		printf "    TEST: %-60s  [FAIL]\n" "${test_msg[*]}"
+	fi
+
+	cleanup_tcpdump
+	cleanup_v6
+	set +e
+}
+
+ndisc_test_untracked_combinations() {
+	ndisc_test_untracked_advertisements 0
+	ndisc_test_untracked_advertisements 1
+	ndisc_test_untracked_advertisements 2 0
+	ndisc_test_untracked_advertisements 2 1
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+	-t <test>       Test(s) to run (default: all)
+			(options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+	echo "SKIP: Could not run test without tcpdump tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v arping)" ]; then
+	echo "SKIP: Could not run test without arping tool"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+cleanup_v6 &> /dev/null
+
+for t in $TESTS
+do
+	case $t in
+	arp_test_gratuitous_combinations|arp) arp_test_gratuitous_combinations;;
+	ndisc_test_untracked_combinations|ndisc) \
+		ndisc_test_untracked_combinations;;
+	help) echo "Test names: $TESTS"; exit 0;;
+esac
+done
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
new file mode 100755
index 000000000000..f366cadbc5e8
--- /dev/null
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -0,0 +1,546 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Test various bareudp tunnel configurations.
+#
+# The bareudp module allows to tunnel network protocols like IP or MPLS over
+# UDP, without adding any intermediate header. This scripts tests several
+# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting
+# IPv4, IPv6 or MPLS packets on the overlay).
+#
+# Network topology:
+#
+#   * A chain of 4 network namespaces, connected with veth pairs. Each veth
+#     is assigned an IPv4 and an IPv6 address. A host-route allows a veth to
+#     join its peer.
+#
+#   * NS0 and NS3 are at the extremities of the chain. They have additional
+#     IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0
+#     and NS3, so that they can communicate using these overlay IP addresses.
+#     For IPv4 and IPv6 reachability tests, the route simply sets the peer's
+#     veth address as gateway. For MPLS reachability tests, an MPLS header is
+#     also pushed before the IP header.
+#
+#   * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to
+#     encapsulate the traffic into UDP.
+#
+# +-----------------------------------------------------------------------+
+# |                                  NS0                                  |
+# |                                                                       |
+# |   lo:                                                                 |
+# |      * IPv4 address: 192.0.2.100/32                                   |
+# |      * IPv6 address: 2001:db8::100/128                                |
+# |      * IPv6 address: 2001:db8::200/128                                |
+# |      * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11            |
+# |      * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11       |
+# |      * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11       |
+# |                    (encapsulated with MPLS label 203)                 |
+# |                                                                       |
+# |   veth01:                                                             |
+# |   ^  * IPv4 address: 192.0.2.10, peer 192.0.2.11/32                   |
+# |   |  * IPv6 address: 2001:db8::10, peer 2001:db8::11/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test)
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS1                                  |
+# |   |                                                                   |
+# |   v                                                                   |
+# |   veth10:                                                             |
+# |      * IPv4 address: 192.0.2.11, peer 192.0.2.10/32                   |
+# |      * IPv6 address: 2001:db8::11, peer 2001:db8::10/128              |
+# |                                                                       |
+# |   bareudp_ns1:                                                        |
+# |      * Encapsulate IP or MPLS packets received on veth10 into UDP     |
+# |        and send the resulting packets through veth12.                 |
+# |      * Decapsulate bareudp packets (either IP or MPLS, over UDP)      |
+# |        received on veth12 and send the inner packets through veth10.  |
+# |                                                                       |
+# |   veth12:                                                             |
+# |   ^  * IPv4 address: 192.0.2.21, peer 192.0.2.22/32                   |
+# |   |  * IPv6 address: 2001:db8::21, peer 2001:db8::22/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test), over UDP
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS2                                  |
+# |   |                                                                   |
+# |   v                                                                   |
+# |   veth21:                                                             |
+# |      * IPv4 address: 192.0.2.22, peer 192.0.2.21/32                   |
+# |      * IPv6 address: 2001:db8::22, peer 2001:db8::21/128              |
+# |                                                                       |
+# |   bareudp_ns2:                                                        |
+# |      * Decapsulate bareudp packets (either IP or MPLS, over UDP)      |
+# |        received on veth21 and send the inner packets through veth23.  |
+# |      * Encapsulate IP or MPLS packets received on veth23 into UDP     |
+# |        and send the resulting packets through veth21.                 |
+# |                                                                       |
+# |   veth23:                                                             |
+# |   ^  * IPv4 address: 192.0.2.32, peer 192.0.2.33/32                   |
+# |   |  * IPv6 address: 2001:db8::32, peer 2001:db8::33/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test)
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS3                                  |
+# |   v                                                                   |
+# |   veth32:                                                             |
+# |      * IPv4 address: 192.0.2.33, peer 192.0.2.32/32                   |
+# |      * IPv6 address: 2001:db8::33, peer 2001:db8::32/128              |
+# |                                                                       |
+# |   lo:                                                                 |
+# |      * IPv4 address: 192.0.2.103/32                                   |
+# |      * IPv6 address: 2001:db8::103/128                                |
+# |      * IPv6 address: 2001:db8::203/128                                |
+# |      * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32            |
+# |      * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32       |
+# |      * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32       |
+# |                    (encapsulated with MPLS label 200)                 |
+# |                                                                       |
+# +-----------------------------------------------------------------------+
+
+ERR=4 # Return 4 by default, which is the SKIP code for kselftest
+PING6="ping"
+PAUSE_ON_FAIL="no"
+
+readonly NS0=$(mktemp -u ns0-XXXXXXXX)
+readonly NS1=$(mktemp -u ns1-XXXXXXXX)
+readonly NS2=$(mktemp -u ns2-XXXXXXXX)
+readonly NS3=$(mktemp -u ns3-XXXXXXXX)
+
+# Exit the script after having removed the network namespaces it created
+#
+# Parameters:
+#
+#   * The list of network namespaces to delete before exiting.
+#
+exit_cleanup()
+{
+	for ns in "$@"; do
+		ip netns delete "${ns}" 2>/dev/null || true
+	done
+
+	if [ "${ERR}" -eq 4 ]; then
+		echo "Error: Setting up the testing environment failed." >&2
+	fi
+
+	exit "${ERR}"
+}
+
+# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3)
+#
+# New namespaces are cleaned up manually in case of error, to ensure that only
+# namespaces created by this script are deleted.
+create_namespaces()
+{
+	ip netns add "${NS0}" || exit_cleanup
+	ip netns add "${NS1}" || exit_cleanup "${NS0}"
+	ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
+	ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
+}
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+	exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+}
+
+# Configure a network interface using a host route
+#
+# Parameters
+#
+#   * $1: the netns the network interface resides in,
+#   * $2: the network interface name,
+#   * $3: the local IPv4 address to assign to this interface,
+#   * $4: the IPv4 address of the remote network interface,
+#   * $5: the local IPv6 address to assign to this interface,
+#   * $6: the IPv6 address of the remote network interface.
+#
+iface_config()
+{
+	local NS="${1}"; readonly NS
+	local DEV="${2}"; readonly DEV
+	local LOCAL_IP4="${3}"; readonly LOCAL_IP4
+	local PEER_IP4="${4}"; readonly PEER_IP4
+	local LOCAL_IP6="${5}"; readonly LOCAL_IP6
+	local PEER_IP6="${6}"; readonly PEER_IP6
+
+	ip -netns "${NS}" link set dev "${DEV}" up
+	ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}"
+	ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad
+}
+
+# Create base networking topology:
+#
+#   * set up the loopback device in all network namespaces (NS0..NS3),
+#   * set up a veth pair to connect each netns in sequence (NS0 with NS1,
+#     NS1 with NS2, etc.),
+#   * add and IPv4 and an IPv6 address on each veth interface,
+#   * prepare the ingress qdiscs in the intermediate namespaces.
+#
+setup_underlay()
+{
+	for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
+		ip -netns "${ns}" link set dev lo up
+	done;
+
+	ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
+	ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
+	ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
+	iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128
+	iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128
+	iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128
+	iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128
+	iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128
+	iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128
+
+	tc -netns "${NS1}" qdisc add dev veth10 ingress
+	tc -netns "${NS2}" qdisc add dev veth23 ingress
+}
+
+# Set up the IPv4, IPv6 and MPLS overlays.
+#
+# Configuration is similar for all protocols:
+#
+#   * add an overlay IP address on the loopback interface of each edge
+#     namespace,
+#   * route these IP addresses via the intermediate namespaces (for the MPLS
+#     tests, this is also where MPLS encapsulation is done),
+#   * add routes for these IP addresses (or MPLS labels) in the intermediate
+#     namespaces.
+#
+# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be
+# done just before running the reachability tests.
+
+setup_overlay_ipv4()
+{
+	# Add the overlay IP addresses and route them through the veth devices
+	ip -netns "${NS0}" address add 192.0.2.100/32 dev lo
+	ip -netns "${NS3}" address add 192.0.2.103/32 dev lo
+	ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11
+	ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32
+
+	# Route the overlay addresses in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
+	ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
+	ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
+
+	# The intermediate namespaces don't have routes for the reverse path,
+	# as it will be handled by tc. So we need to ensure that rp_filter is
+	# not going to block the traffic.
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
+}
+
+setup_overlay_ipv6()
+{
+	# Add the overlay IP addresses and route them through the veth devices
+	ip -netns "${NS0}" address add 2001:db8::100/128 dev lo
+	ip -netns "${NS3}" address add 2001:db8::103/128 dev lo
+	ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11
+	ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32
+
+	# Route the overlay addresses in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10
+	ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33
+}
+
+setup_overlay_mpls()
+{
+	# Add specific overlay IP addresses, routed over MPLS
+	ip -netns "${NS0}" address add 2001:db8::200/128 dev lo
+	ip -netns "${NS3}" address add 2001:db8::203/128 dev lo
+	ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11
+	ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32
+
+	# Route the MPLS packets in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256
+	ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256
+	ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10
+	ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33
+}
+
+# Run "ping" from NS0 and print the result
+#
+# Parameters:
+#
+#   * $1: the variant of ping to use (normally either "ping" or "ping6"),
+#   * $2: the IP address to ping,
+#   * $3: a human readable description of the purpose of the test.
+#
+# If the test fails and PAUSE_ON_FAIL is active, the user is given the
+# possibility to continue with the next test or to quit immediately.
+#
+ping_test_one()
+{
+	local PING="$1"; readonly PING
+	local IP="$2"; readonly IP
+	local MSG="$3"; readonly MSG
+	local RET
+
+	printf "TEST: %-60s  " "${MSG}"
+
+	set +e
+	ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1
+	RET=$?
+	set -e
+
+	if [ "${RET}" -eq 0 ]; then
+		printf "[ OK ]\n"
+	else
+		ERR=1
+		printf "[FAIL]\n"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			printf "\nHit enter to continue, 'q' to quit\n"
+			read a
+			if [ "$a" = "q" ]; then
+				exit 1
+			fi
+		fi
+	fi
+}
+
+# Run reachability tests
+#
+# Parameters:
+#
+#   * $1: human readable string describing the underlay protocol.
+#
+# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling
+# function.
+#
+ping_test()
+{
+	local UNDERLAY="$1"; readonly UNDERLAY
+	local MODE
+	local MSG
+
+	if [ "${MULTIPROTO}" = "multiproto" ]; then
+		MODE=" (multiproto mode)"
+	else
+		MODE=""
+	fi
+
+	if [ $IPV4 ]; then
+		ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}"
+	fi
+	if [ $IPV6 ]; then
+		ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}"
+	fi
+	if [ $MPLS_UC ]; then
+		ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}"
+	fi
+}
+
+# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6
+#
+# Parameters:
+#
+#   * $1: the packet type (protocol) to be handled by bareudp,
+#   * $2: a flag to activate or deactivate bareudp's "multiproto" mode.
+#
+test_overlay()
+{
+	local ETHERTYPE="$1"; readonly ETHERTYPE
+	local MULTIPROTO="$2"; readonly MULTIPROTO
+	local IPV4
+	local IPV6
+	local MPLS_UC
+
+	case "${ETHERTYPE}" in
+		"ipv4")
+			IPV4="ipv4"
+			if [ "${MULTIPROTO}" = "multiproto" ]; then
+				IPV6="ipv6"
+			else
+				IPV6=""
+			fi
+			MPLS_UC=""
+			;;
+		"ipv6")
+			IPV6="ipv6"
+			IPV4=""
+			MPLS_UC=""
+			;;
+		"mpls_uc")
+			MPLS_UC="mpls_uc"
+			IPV4=""
+			IPV6=""
+			;;
+		*)
+			exit 1
+			;;
+	esac
+	readonly IPV4
+	readonly IPV6
+	readonly MPLS_UC
+
+	# Create the bareudp devices in the intermediate namespaces
+	ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+	ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+
+	# IPv4 over UDPv4
+	if [ $IPV4 ]; then
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4         \
+			flower dst_ip 192.0.2.103/32                                   \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4         \
+			flower dst_ip 192.0.2.100/32                                   \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# IPv6 over UDPv4
+	if [ $IPV6 ]; then
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6         \
+			flower dst_ip 2001:db8::103/128                                \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6         \
+			flower dst_ip 2001:db8::100/128                                \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# MPLS (unicast) over UDPv4
+	if [ $MPLS_UC ]; then
+		ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1
+		ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1
+
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc      \
+			flower mpls_label 203                                          \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc      \
+			flower mpls_label 200                                          \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# Test IPv4 underlay
+	ping_test "UDPv4"
+
+	# Cleanup bareudp encapsulation instructions, as they were specific to
+	# the IPv4 underlay, before setting up and testing the IPv6 underlay
+	tc -netns "${NS1}" filter delete dev veth10 ingress
+	tc -netns "${NS2}" filter delete dev veth23 ingress
+
+	# IPv4 over UDPv6
+	if [ $IPV4 ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4             \
+			flower dst_ip 192.0.2.103/32                                       \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4             \
+			flower dst_ip 192.0.2.100/32                                       \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# IPv6 over UDPv6
+	if [ $IPV6 ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6             \
+			flower dst_ip 2001:db8::103/128                                    \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6             \
+			flower dst_ip 2001:db8::100/128                                    \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# MPLS (unicast) over UDPv6
+	if [ $MPLS_UC ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc          \
+			flower mpls_label 203                                              \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc          \
+			flower mpls_label 200                                              \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# Test IPv6 underlay
+	ping_test "UDPv6"
+
+	tc -netns "${NS1}" filter delete dev veth10 ingress
+	tc -netns "${NS2}" filter delete dev veth23 ingress
+	ip -netns "${NS1}" link delete bareudp_ns1
+	ip -netns "${NS2}" link delete bareudp_ns2
+}
+
+check_features()
+{
+	ip link help 2>&1 | grep -q bareudp
+	if [ $? -ne 0 ]; then
+		echo "Missing bareudp support in iproute2" >&2
+		exit_cleanup
+	fi
+
+	# Use ping6 on systems where ping doesn't handle IPv6
+	ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6"
+}
+
+usage()
+{
+	echo "Usage: $0 [-p]"
+	exit 1
+}
+
+while getopts :p o
+do
+	case $o in
+		p) PAUSE_ON_FAIL="yes";;
+		*) usage;;
+	esac
+done
+
+check_features
+
+# Create namespaces before setting up the exit trap.
+# Otherwise, exit_cleanup_all() could delete namespaces that were not created
+# by this script.
+create_namespaces
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_underlay
+setup_overlay_ipv4
+setup_overlay_ipv6
+setup_overlay_mpls
+
+test_overlay ipv4 nomultiproto
+test_overlay ipv6 nomultiproto
+test_overlay ipv4 multiproto
+test_overlay mpls_uc nomultiproto
+
+if [ "${ERR}" -eq 1 ]; then
+	echo "Some tests failed." >&2
+else
+	ERR=0
+fi
diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh
new file mode 100755
index 000000000000..2db9d15cd45f
--- /dev/null
+++ b/tools/testing/selftests/net/big_tcp.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For IPv4 and IPv6 BIG TCP.
+# TOPO: CLIENT_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) SERVER_NS
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="198.51.100.1"
+CLIENT_IP6="2001:db8:1::1"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="203.0.113.1"
+SERVER_IP6="2001:db8:2::1"
+
+ROUTER_NS=$(mktemp -u router-XXXXXXXX)
+SERVER_GW4="203.0.113.2"
+CLIENT_GW4="198.51.100.2"
+SERVER_GW6="2001:db8:2::2"
+CLIENT_GW6="2001:db8:1::2"
+
+MAX_SIZE=128000
+CHK_SIZE=65535
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+setup() {
+	ip netns add $CLIENT_NS
+	ip netns add $SERVER_NS
+	ip netns add $ROUTER_NS
+	ip -net $ROUTER_NS link add link1 type veth peer name link0 netns $CLIENT_NS
+	ip -net $ROUTER_NS link add link2 type veth peer name link3 netns $SERVER_NS
+
+	ip -net $CLIENT_NS link set link0 up
+	ip -net $CLIENT_NS link set link0 mtu 1442
+	ip -net $CLIENT_NS addr add $CLIENT_IP4/24 dev link0
+	ip -net $CLIENT_NS addr add $CLIENT_IP6/64 dev link0 nodad
+	ip -net $CLIENT_NS route add $SERVER_IP4 dev link0 via $CLIENT_GW4
+	ip -net $CLIENT_NS route add $SERVER_IP6 dev link0 via $CLIENT_GW6
+	ip -net $CLIENT_NS link set dev link0 \
+		gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+	ip -net $CLIENT_NS link set dev link0 \
+		gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+	ip net exec $CLIENT_NS sysctl -wq net.ipv4.tcp_window_scaling=10
+
+	ip -net $ROUTER_NS link set link1 up
+	ip -net $ROUTER_NS link set link2 up
+	ip -net $ROUTER_NS addr add $CLIENT_GW4/24 dev link1
+	ip -net $ROUTER_NS addr add $CLIENT_GW6/64 dev link1 nodad
+	ip -net $ROUTER_NS addr add $SERVER_GW4/24 dev link2
+	ip -net $ROUTER_NS addr add $SERVER_GW6/64 dev link2 nodad
+	ip -net $ROUTER_NS link set dev link1 \
+		gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+	ip -net $ROUTER_NS link set dev link2 \
+		gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+	ip -net $ROUTER_NS link set dev link1 \
+		gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+	ip -net $ROUTER_NS link set dev link2 \
+		gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+	# test for nf_ct_skb_network_trim in nf_conntrack_ovs used by TC ct action.
+	ip net exec $ROUTER_NS tc qdisc add dev link1 ingress
+	ip net exec $ROUTER_NS tc filter add dev link1 ingress \
+		proto ip flower ip_proto tcp action ct
+	ip net exec $ROUTER_NS tc filter add dev link1 ingress \
+		proto ipv6 flower ip_proto tcp action ct
+	ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
+	ip net exec $ROUTER_NS sysctl -wq net.ipv6.conf.all.forwarding=1
+
+	ip -net $SERVER_NS link set link3 up
+	ip -net $SERVER_NS addr add $SERVER_IP4/24 dev link3
+	ip -net $SERVER_NS addr add $SERVER_IP6/64 dev link3 nodad
+	ip -net $SERVER_NS route add $CLIENT_IP4 dev link3 via $SERVER_GW4
+	ip -net $SERVER_NS route add $CLIENT_IP6 dev link3 via $SERVER_GW6
+	ip -net $SERVER_NS link set dev link3 \
+		gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+	ip -net $SERVER_NS link set dev link3 \
+		gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+	ip net exec $SERVER_NS sysctl -wq net.ipv4.tcp_window_scaling=10
+	ip net exec $SERVER_NS netserver 2>&1 >/dev/null
+}
+
+cleanup() {
+	ip net exec $SERVER_NS pkill netserver
+	ip -net $ROUTER_NS link del link1
+	ip -net $ROUTER_NS link del link2
+	ip netns del "$CLIENT_NS"
+	ip netns del "$SERVER_NS"
+	ip netns del "$ROUTER_NS"
+}
+
+start_counter() {
+	local ipt="iptables"
+	local iface=$1
+	local netns=$2
+
+	[ "$NF" = "6" ] && ipt="ip6tables"
+	ip net exec $netns $ipt -t raw -A PREROUTING -i $iface \
+		-m length ! --length 0:$CHK_SIZE -j ACCEPT
+}
+
+check_counter() {
+	local ipt="iptables"
+	local iface=$1
+	local netns=$2
+
+	[ "$NF" = "6" ] && ipt="ip6tables"
+	test `ip net exec $netns $ipt -t raw -L -v |grep $iface | awk '{print $1}'` != "0"
+}
+
+stop_counter() {
+	local ipt="iptables"
+	local iface=$1
+	local netns=$2
+
+	[ "$NF" = "6" ] && ipt="ip6tables"
+	ip net exec $netns $ipt -t raw -D PREROUTING -i $iface \
+		-m length ! --length 0:$CHK_SIZE -j ACCEPT
+}
+
+do_netperf() {
+	local serip=$SERVER_IP4
+	local netns=$1
+
+	[ "$NF" = "6" ] && serip=$SERVER_IP6
+
+	# use large write to be sure to generate big tcp packets
+	ip net exec $netns netperf -$NF -t TCP_STREAM -l 1 -H $serip -- -m 262144 2>&1 >/dev/null
+}
+
+do_test() {
+	local cli_tso=$1
+	local gw_gro=$2
+	local gw_tso=$3
+	local ser_gro=$4
+	local ret="PASS"
+
+	ip net exec $CLIENT_NS ethtool -K link0 tso $cli_tso
+	ip net exec $ROUTER_NS ethtool -K link1 gro $gw_gro
+	ip net exec $ROUTER_NS ethtool -K link2 tso $gw_tso
+	ip net exec $SERVER_NS ethtool -K link3 gro $ser_gro
+
+	start_counter link1 $ROUTER_NS
+	start_counter link3 $SERVER_NS
+	do_netperf $CLIENT_NS
+
+	if check_counter link1 $ROUTER_NS; then
+		check_counter link3 $SERVER_NS || ret="FAIL_on_link3"
+	else
+		ret="FAIL_on_link1"
+	fi
+
+	stop_counter link1 $ROUTER_NS
+	stop_counter link3 $SERVER_NS
+	printf "%-9s %-8s %-8s %-8s: [%s]\n" \
+		$cli_tso $gw_gro $gw_tso $ser_gro $ret
+	test $ret = "PASS"
+}
+
+testup() {
+	echo "CLI GSO | GW GRO | GW GSO | SER GRO" && \
+	do_test "on"  "on"  "on"  "on"  && \
+	do_test "on"  "off" "on"  "off" && \
+	do_test "off" "on"  "on"  "on"  && \
+	do_test "on"  "on"  "off" "on"  && \
+	do_test "off" "on"  "off" "on"
+}
+
+if ! netperf -V &> /dev/null; then
+	echo "SKIP: Could not run test without netperf tool"
+	exit $ksft_skip
+fi
+
+if ! ip link help 2>&1 | grep gso_ipv4_max_size &> /dev/null; then
+	echo "SKIP: Could not run test without gso/gro_ipv4_max_size supported in ip-link"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+setup && echo "Testing for BIG TCP:" && \
+NF=4 testup && echo "***v4 Tests Done***" && \
+NF=6 testup && echo "***v6 Tests Done***"
+exit $?
diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c
new file mode 100644
index 000000000000..57ff67a3751e
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This times how long it takes to bind to a port when the port already
+ * has multiple sockets in its bhash table.
+ *
+ * In the setup(), we populate the port's bhash table with
+ * MAX_THREADS * MAX_CONNECTIONS number of entries.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <netdb.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define MAX_THREADS 600
+#define MAX_CONNECTIONS 40
+
+static const char *setup_addr_v6 = "::1";
+static const char *setup_addr_v4 = "127.0.0.1";
+static const char *setup_addr;
+static const char *bind_addr;
+static const char *port;
+bool use_v6;
+int ret;
+
+static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
+
+static int bind_socket(int opt, const char *addr)
+{
+	struct addrinfo *res, hint = {};
+	int sock_fd, reuse = 1, err;
+	int domain = use_v6 ? AF_INET6 : AF_INET;
+
+	sock_fd = socket(domain, SOCK_STREAM, 0);
+	if (sock_fd < 0) {
+		perror("socket fd err");
+		return sock_fd;
+	}
+
+	hint.ai_family = domain;
+	hint.ai_socktype = SOCK_STREAM;
+
+	err = getaddrinfo(addr, port, &hint, &res);
+	if (err) {
+		perror("getaddrinfo failed");
+		goto cleanup;
+	}
+
+	if (opt) {
+		err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
+		if (err) {
+			perror("setsockopt failed");
+			goto cleanup;
+		}
+	}
+
+	err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
+	if (err) {
+		perror("failed to bind to port");
+		goto cleanup;
+	}
+
+	return sock_fd;
+
+cleanup:
+	close(sock_fd);
+	return err;
+}
+
+static void *setup(void *arg)
+{
+	int sock_fd, i;
+	int *array = (int *)arg;
+
+	for (i = 0; i < MAX_CONNECTIONS; i++) {
+		sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+		if (sock_fd < 0) {
+			ret = sock_fd;
+			pthread_exit(&ret);
+		}
+		array[i] = sock_fd;
+	}
+
+	return NULL;
+}
+
+int main(int argc, const char *argv[])
+{
+	int listener_fd, sock_fd, i, j;
+	pthread_t tid[MAX_THREADS];
+	clock_t begin, end;
+
+	if (argc != 4) {
+		printf("Usage: listener <port> <ipv6 | ipv4> <bind-addr>\n");
+		return -1;
+	}
+
+	port = argv[1];
+	use_v6 = strcmp(argv[2], "ipv6") == 0;
+	bind_addr = argv[3];
+
+	setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4;
+
+	listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+	if (listen(listener_fd, 100) < 0) {
+		perror("listen failed");
+		return -1;
+	}
+
+	/* Set up threads to populate the bhash table entry for the port */
+	for (i = 0; i < MAX_THREADS; i++)
+		pthread_create(&tid[i], NULL, setup, fd_array[i]);
+
+	for (i = 0; i < MAX_THREADS; i++)
+		pthread_join(tid[i], NULL);
+
+	if (ret)
+		goto done;
+
+	begin = clock();
+
+	/* Bind to the same port on a different address */
+	sock_fd  = bind_socket(0, bind_addr);
+	if (sock_fd < 0)
+		goto done;
+
+	end = clock();
+
+	printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
+
+	/* clean up */
+	close(sock_fd);
+
+done:
+	close(listener_fd);
+	for (i = 0; i < MAX_THREADS; i++) {
+		for (j = 0; i < MAX_THREADS; i++)
+			close(fd_array[i][j]);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh
new file mode 100755
index 000000000000..a28563bdaae0
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NR_FILES=32768
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+# default values
+port=443
+addr_v6="2001:0db8:0:f101::1"
+addr_v4="10.8.8.8"
+use_v6=true
+addr=""
+
+usage() {
+    echo "Usage: $0 [-6 | -4] [-p port] [-a address]"
+    echo -e "\t6: use ipv6"
+    echo -e "\t4: use ipv4"
+    echo -e "\tport: Port number"
+    echo -e "\taddress: ip address"
+}
+
+while getopts "ha:p:64" opt; do
+    case ${opt} in
+	h)
+	    usage $0
+	    exit 0
+	    ;;
+	a)  addr=$OPTARG;;
+	p)
+	    port=$OPTARG;;
+	6)
+	    use_v6=true;;
+	4)
+	    use_v6=false;;
+    esac
+done
+
+setup() {
+    ip netns add "${NETNS}"
+    ip -netns "${NETNS}" link add veth0 type veth peer name veth1
+    ip -netns "${NETNS}" link set lo up
+    ip -netns "${NETNS}" link set veth0 up
+    ip -netns "${NETNS}" link set veth1 up
+
+    if [[ "$use_v6" == true ]]; then
+        ip -netns "${NETNS}" addr add $addr_v6 nodad dev veth0
+    else
+        ip -netns "${NETNS}" addr add $addr_v4 dev lo
+    fi
+}
+
+cleanup() {
+    ip netns del "${NETNS}"
+}
+
+if [[ "$addr" != "" ]]; then
+    addr_v4=$addr;
+    addr_v6=$addr;
+fi
+setup
+if [[ "$use_v6" == true ]] ; then
+    ip netns exec "${NETNS}" sh -c \
+        "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv6 ${addr_v6}"
+else
+    ip netns exec "${NETNS}" sh -c \
+        "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv4 ${addr_v4}"
+fi
+cleanup
diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c
new file mode 100644
index 000000000000..cb9fdf51ea59
--- /dev/null
+++ b/tools/testing/selftests/net/bind_timewait.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "../kselftest_harness.h"
+
+FIXTURE(bind_timewait)
+{
+	struct sockaddr_in addr;
+	socklen_t addrlen;
+};
+
+FIXTURE_VARIANT(bind_timewait)
+{
+	__u32 addr_const;
+};
+
+FIXTURE_VARIANT_ADD(bind_timewait, localhost)
+{
+	.addr_const = INADDR_LOOPBACK
+};
+
+FIXTURE_VARIANT_ADD(bind_timewait, addrany)
+{
+	.addr_const = INADDR_ANY
+};
+
+FIXTURE_SETUP(bind_timewait)
+{
+	self->addr.sin_family = AF_INET;
+	self->addr.sin_port = 0;
+	self->addr.sin_addr.s_addr = htonl(variant->addr_const);
+	self->addrlen = sizeof(self->addr);
+}
+
+FIXTURE_TEARDOWN(bind_timewait)
+{
+}
+
+void create_timewait_socket(struct __test_metadata *_metadata,
+			    FIXTURE_DATA(bind_timewait) *self)
+{
+	int server_fd, client_fd, child_fd, ret;
+	struct sockaddr_in addr;
+	socklen_t addrlen;
+
+	server_fd = socket(AF_INET, SOCK_STREAM, 0);
+	ASSERT_GT(server_fd, 0);
+
+	ret = bind(server_fd, (struct sockaddr *)&self->addr, self->addrlen);
+	ASSERT_EQ(ret, 0);
+
+	ret = listen(server_fd, 1);
+	ASSERT_EQ(ret, 0);
+
+	ret = getsockname(server_fd, (struct sockaddr *)&self->addr, &self->addrlen);
+	ASSERT_EQ(ret, 0);
+
+	client_fd = socket(AF_INET, SOCK_STREAM, 0);
+	ASSERT_GT(client_fd, 0);
+
+	ret = connect(client_fd, (struct sockaddr *)&self->addr, self->addrlen);
+	ASSERT_EQ(ret, 0);
+
+	addrlen = sizeof(addr);
+	child_fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+	ASSERT_GT(child_fd, 0);
+
+	close(child_fd);
+	close(client_fd);
+	close(server_fd);
+}
+
+TEST_F(bind_timewait, 1)
+{
+	int fd, ret;
+
+	create_timewait_socket(_metadata, self);
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	ASSERT_GT(fd, 0);
+
+	ret = bind(fd, (struct sockaddr *)&self->addr, self->addrlen);
+	ASSERT_EQ(ret, -1);
+	ASSERT_EQ(errno, EADDRINUSE);
+
+	close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
new file mode 100644
index 000000000000..a2662348cdb1
--- /dev/null
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "../kselftest_harness.h"
+
+struct in6_addr in6addr_v4mapped_any = {
+	.s6_addr = {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 255, 255,
+		0, 0, 0, 0
+	}
+};
+
+struct in6_addr in6addr_v4mapped_loopback = {
+	.s6_addr = {
+		0, 0, 0, 0,
+		0, 0, 0, 0,
+		0, 0, 255, 255,
+		127, 0, 0, 1
+	}
+};
+
+FIXTURE(bind_wildcard)
+{
+	struct sockaddr_in addr4;
+	struct sockaddr_in6 addr6;
+};
+
+FIXTURE_VARIANT(bind_wildcard)
+{
+	const __u32 addr4_const;
+	const struct in6_addr *addr6_const;
+	int expected_errno;
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
+{
+	.addr4_const = INADDR_ANY,
+	.addr6_const = &in6addr_any,
+	.expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
+{
+	.addr4_const = INADDR_ANY,
+	.addr6_const = &in6addr_loopback,
+	.expected_errno = 0,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any)
+{
+	.addr4_const = INADDR_ANY,
+	.addr6_const = &in6addr_v4mapped_any,
+	.expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local)
+{
+	.addr4_const = INADDR_ANY,
+	.addr6_const = &in6addr_v4mapped_loopback,
+	.expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
+{
+	.addr4_const = INADDR_LOOPBACK,
+	.addr6_const = &in6addr_any,
+	.expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
+{
+	.addr4_const = INADDR_LOOPBACK,
+	.addr6_const = &in6addr_loopback,
+	.expected_errno = 0,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any)
+{
+	.addr4_const = INADDR_LOOPBACK,
+	.addr6_const = &in6addr_v4mapped_any,
+	.expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local)
+{
+	.addr4_const = INADDR_LOOPBACK,
+	.addr6_const = &in6addr_v4mapped_loopback,
+	.expected_errno = EADDRINUSE,
+};
+
+FIXTURE_SETUP(bind_wildcard)
+{
+	self->addr4.sin_family = AF_INET;
+	self->addr4.sin_port = htons(0);
+	self->addr4.sin_addr.s_addr = htonl(variant->addr4_const);
+
+	self->addr6.sin6_family = AF_INET6;
+	self->addr6.sin6_port = htons(0);
+	self->addr6.sin6_addr = *variant->addr6_const;
+}
+
+FIXTURE_TEARDOWN(bind_wildcard)
+{
+}
+
+void bind_sockets(struct __test_metadata *_metadata,
+		  FIXTURE_DATA(bind_wildcard) *self,
+		  int expected_errno,
+		  struct sockaddr *addr1, socklen_t addrlen1,
+		  struct sockaddr *addr2, socklen_t addrlen2)
+{
+	int fd[2];
+	int ret;
+
+	fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0);
+	ASSERT_GT(fd[0], 0);
+
+	ret = bind(fd[0], addr1, addrlen1);
+	ASSERT_EQ(ret, 0);
+
+	ret = getsockname(fd[0], addr1, &addrlen1);
+	ASSERT_EQ(ret, 0);
+
+	((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port;
+
+	fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0);
+	ASSERT_GT(fd[1], 0);
+
+	ret = bind(fd[1], addr2, addrlen2);
+	if (expected_errno) {
+		ASSERT_EQ(ret, -1);
+		ASSERT_EQ(errno, expected_errno);
+	} else {
+		ASSERT_EQ(ret, 0);
+	}
+
+	close(fd[1]);
+	close(fd[0]);
+}
+
+TEST_F(bind_wildcard, v4_v6)
+{
+	bind_sockets(_metadata, self, variant->expected_errno,
+		     (struct sockaddr *)&self->addr4, sizeof(self->addr4),
+		     (struct sockaddr *)&self->addr6, sizeof(self->addr6));
+}
+
+TEST_F(bind_wildcard, v6_v4)
+{
+	bind_sockets(_metadata, self, variant->expected_errno,
+		     (struct sockaddr *)&self->addr6, sizeof(self->addr6),
+		     (struct sockaddr *)&self->addr4, sizeof(self->addr4));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
new file mode 100755
index 000000000000..8bc23fb4c82b
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+TMPF=$(mktemp --suffix ".pcap")
+
+cleanup()
+{
+    rm -f $TMPF
+    cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+tcpdump -h | grep immediate-mode >> /dev/null
+if [ $? -ne 0 ]; then
+    echo "SKIP - tcpdump with --immediate-mode option required"
+    exit $ksft_skip
+fi
+
+# Namespaces
+setup_ns NS
+NSEXE="ip netns exec $NS"
+
+$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+    ((TOTAL++))
+    if [ $1 -ne $2 ]; then
+	echo "  Case $3 returned $1, expected $2"
+	((BAD++))
+    fi
+}
+
+# IPV6_DONTFRAG
+for ovr in setsock cmsg both diff; do
+    for df in 0 1; do
+	for p in u i r; do
+	    [ $p == "u" ] && prot=UDP
+	    [ $p == "i" ] && prot=ICMP
+	    [ $p == "r" ] && prot=RAW
+
+	    [ $ovr == "setsock" ] && m="-F $df"
+	    [ $ovr == "cmsg" ]    && m="-f $df"
+	    [ $ovr == "both" ]    && m="-F $df -f $df"
+	    [ $ovr == "diff" ]    && m="-F $((1 - df)) -f $df"
+
+	    $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
+	    check_result $? $df "DONTFRAG $prot $ovr"
+	done
+    done
+done
+
+# IPV6_TCLASS
+TOS=0x10
+TOS2=0x20
+
+ip -6 -netns $NS rule add tos $TOS lookup 300
+ip -6 -netns $NS route add table 300 prohibit any
+
+for ovr in setsock cmsg both diff; do
+    for p in u i r; do
+	[ $p == "u" ] && prot=UDP
+	[ $p == "i" ] && prot=ICMP
+	[ $p == "r" ] && prot=RAW
+
+	[ $ovr == "setsock" ] && m="-C"
+	[ $ovr == "cmsg" ]    && m="-c"
+	[ $ovr == "both" ]    && m="-C $((TOS2)) -c"
+	[ $ovr == "diff" ]    && m="-C $((TOS )) -c"
+
+	$NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+	BG=$!
+	sleep 0.05
+
+	$NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
+	check_result $? 0 "TCLASS $prot $ovr - pass"
+
+	while [ -d /proc/$BG ]; do
+	    $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
+	done
+
+	tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
+	check_result $? 0 "TCLASS $prot $ovr - packet data"
+	rm $TMPF
+
+	[ $ovr == "both" ]    && m="-C $((TOS )) -c"
+	[ $ovr == "diff" ]    && m="-C $((TOS2)) -c"
+
+	$NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234
+	check_result $? 1 "TCLASS $prot $ovr - rejection"
+    done
+done
+
+# IPV6_HOPLIMIT
+LIM=4
+
+for ovr in setsock cmsg both diff; do
+    for p in u i r; do
+	[ $p == "u" ] && prot=UDP
+	[ $p == "i" ] && prot=ICMP
+	[ $p == "r" ] && prot=RAW
+
+	[ $ovr == "setsock" ] && m="-L"
+	[ $ovr == "cmsg" ]    && m="-l"
+	[ $ovr == "both" ]    && m="-L $LIM -l"
+	[ $ovr == "diff" ]    && m="-L $((LIM + 1)) -l"
+
+	$NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+	BG=$!
+	sleep 0.05
+
+	$NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
+	check_result $? 0 "HOPLIMIT $prot $ovr - pass"
+
+	while [ -d /proc/$BG ]; do
+	    $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
+	done
+
+	tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
+	check_result $? 0 "HOPLIMIT $prot $ovr - packet data"
+	rm $TMPF
+    done
+done
+
+# IPV6 exthdr
+for p in u i r; do
+    # Very basic "does it crash" test
+    for h in h d r; do
+	$NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
+	check_result $? 0 "ExtHdr $prot $ovr - pass"
+    done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+    echo "FAIL - $BAD/$TOTAL cases failed"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
new file mode 100644
index 000000000000..c79e65581dc3
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <errno.h>
+#include <error.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/errqueue.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/net_tstamp.h>
+#include <linux/types.h>
+#include <linux/udp.h>
+#include <sys/socket.h>
+
+#include "../kselftest.h"
+
+enum {
+	ERN_SUCCESS = 0,
+	/* Well defined errors, callers may depend on these */
+	ERN_SEND = 1,
+	/* Informational, can reorder */
+	ERN_HELP,
+	ERN_SEND_SHORT,
+	ERN_SOCK_CREATE,
+	ERN_RESOLVE,
+	ERN_CMSG_WR,
+	ERN_SOCKOPT,
+	ERN_GETTIME,
+	ERN_RECVERR,
+	ERN_CMSG_RD,
+	ERN_CMSG_RCV,
+};
+
+struct option_cmsg_u32 {
+	bool ena;
+	unsigned int val;
+};
+
+struct options {
+	bool silent_send;
+	const char *host;
+	const char *service;
+	unsigned int size;
+	unsigned int num_pkt;
+	struct {
+		unsigned int mark;
+		unsigned int dontfrag;
+		unsigned int tclass;
+		unsigned int hlimit;
+		unsigned int priority;
+	} sockopt;
+	struct {
+		unsigned int family;
+		unsigned int type;
+		unsigned int proto;
+	} sock;
+	struct option_cmsg_u32 mark;
+	struct {
+		bool ena;
+		unsigned int delay;
+	} txtime;
+	struct {
+		bool ena;
+	} ts;
+	struct {
+		struct option_cmsg_u32 dontfrag;
+		struct option_cmsg_u32 tclass;
+		struct option_cmsg_u32 hlimit;
+		struct option_cmsg_u32 exthdr;
+	} v6;
+} opt = {
+	.size = 13,
+	.num_pkt = 1,
+	.sock = {
+		.family	= AF_UNSPEC,
+		.type	= SOCK_DGRAM,
+		.proto	= IPPROTO_UDP,
+	},
+};
+
+static struct timespec time_start_real;
+static struct timespec time_start_mono;
+
+static void __attribute__((noreturn)) cs_usage(const char *bin)
+{
+	printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+	printf("Options:\n"
+	       "\t\t-s      Silent send() failures\n"
+	       "\t\t-S      send() size\n"
+	       "\t\t-4/-6   Force IPv4 / IPv6 only\n"
+	       "\t\t-p prot Socket protocol\n"
+	       "\t\t        (u = UDP (default); i = ICMP; r = RAW)\n"
+	       "\n"
+	       "\t\t-m val  Set SO_MARK with given value\n"
+	       "\t\t-M val  Set SO_MARK via setsockopt\n"
+	       "\t\t-d val  Set SO_TXTIME with given delay (usec)\n"
+	       "\t\t-t      Enable time stamp reporting\n"
+	       "\t\t-f val  Set don't fragment via cmsg\n"
+	       "\t\t-F val  Set don't fragment via setsockopt\n"
+	       "\t\t-c val  Set TCLASS via cmsg\n"
+	       "\t\t-C val  Set TCLASS via setsockopt\n"
+	       "\t\t-l val  Set HOPLIMIT via cmsg\n"
+	       "\t\t-L val  Set HOPLIMIT via setsockopt\n"
+	       "\t\t-H type Add an IPv6 header option\n"
+	       "\t\t        (h = HOP; d = DST; r = RTDST)"
+	       "");
+	exit(ERN_HELP);
+}
+
+static void cs_parse_args(int argc, char *argv[])
+{
+	int o;
+
+	while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
+		switch (o) {
+		case 's':
+			opt.silent_send = true;
+			break;
+		case 'S':
+			opt.size = atoi(optarg);
+			break;
+		case '4':
+			opt.sock.family = AF_INET;
+			break;
+		case '6':
+			opt.sock.family = AF_INET6;
+			break;
+		case 'p':
+			if (*optarg == 'u' || *optarg == 'U') {
+				opt.sock.proto = IPPROTO_UDP;
+			} else if (*optarg == 'i' || *optarg == 'I') {
+				opt.sock.proto = IPPROTO_ICMP;
+			} else if (*optarg == 'r') {
+				opt.sock.type = SOCK_RAW;
+			} else {
+				printf("Error: unknown protocol: %s\n", optarg);
+				cs_usage(argv[0]);
+			}
+			break;
+		case 'P':
+			opt.sockopt.priority = atoi(optarg);
+			break;
+		case 'm':
+			opt.mark.ena = true;
+			opt.mark.val = atoi(optarg);
+			break;
+		case 'M':
+			opt.sockopt.mark = atoi(optarg);
+			break;
+		case 'n':
+			opt.num_pkt = atoi(optarg);
+			break;
+		case 'd':
+			opt.txtime.ena = true;
+			opt.txtime.delay = atoi(optarg);
+			break;
+		case 't':
+			opt.ts.ena = true;
+			break;
+		case 'f':
+			opt.v6.dontfrag.ena = true;
+			opt.v6.dontfrag.val = atoi(optarg);
+			break;
+		case 'F':
+			opt.sockopt.dontfrag = atoi(optarg);
+			break;
+		case 'c':
+			opt.v6.tclass.ena = true;
+			opt.v6.tclass.val = atoi(optarg);
+			break;
+		case 'C':
+			opt.sockopt.tclass = atoi(optarg);
+			break;
+		case 'l':
+			opt.v6.hlimit.ena = true;
+			opt.v6.hlimit.val = atoi(optarg);
+			break;
+		case 'L':
+			opt.sockopt.hlimit = atoi(optarg);
+			break;
+		case 'H':
+			opt.v6.exthdr.ena = true;
+			switch (optarg[0]) {
+			case 'h':
+				opt.v6.exthdr.val = IPV6_HOPOPTS;
+				break;
+			case 'd':
+				opt.v6.exthdr.val = IPV6_DSTOPTS;
+				break;
+			case 'r':
+				opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS;
+				break;
+			default:
+				printf("Error: hdr type: %s\n", optarg);
+				break;
+			}
+			break;
+		}
+	}
+
+	if (optind != argc - 2)
+		cs_usage(argv[0]);
+
+	opt.host = argv[optind];
+	opt.service = argv[optind + 1];
+}
+
+static void memrnd(void *s, size_t n)
+{
+	int *dword = s;
+	char *byte;
+
+	for (; n >= 4; n -= 4)
+		*dword++ = rand();
+	byte = (void *)dword;
+	while (n--)
+		*byte++ = rand();
+}
+
+static void
+ca_write_cmsg_u32(char *cbuf, size_t cbuf_sz, size_t *cmsg_len,
+		  int level, int optname, struct option_cmsg_u32 *uopt)
+{
+	struct cmsghdr *cmsg;
+
+	if (!uopt->ena)
+		return;
+
+	cmsg = (struct cmsghdr *)(cbuf + *cmsg_len);
+	*cmsg_len += CMSG_SPACE(sizeof(__u32));
+	if (cbuf_sz < *cmsg_len)
+		error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+	cmsg->cmsg_level = level;
+	cmsg->cmsg_type = optname;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+	*(__u32 *)CMSG_DATA(cmsg) = uopt->val;
+}
+
+static void
+cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
+{
+	struct cmsghdr *cmsg;
+	size_t cmsg_len;
+
+	msg->msg_control = cbuf;
+	cmsg_len = 0;
+
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_SOCKET, SO_MARK, &opt.mark);
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
+
+	if (opt.txtime.ena) {
+		struct sock_txtime so_txtime = {
+			.clockid = CLOCK_MONOTONIC,
+		};
+		__u64 txtime;
+
+		if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+			       &so_txtime, sizeof(so_txtime)))
+			error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
+
+		txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) +
+			 time_start_mono.tv_nsec +
+			 opt.txtime.delay * 1000;
+
+		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+		cmsg_len += CMSG_SPACE(sizeof(txtime));
+		if (cbuf_sz < cmsg_len)
+			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_TXTIME;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(txtime));
+		memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime));
+	}
+	if (opt.ts.ena) {
+		__u32 val = SOF_TIMESTAMPING_SOFTWARE |
+			    SOF_TIMESTAMPING_OPT_TSONLY;
+
+		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+			       &val, sizeof(val)))
+			error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
+
+		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+		cmsg_len += CMSG_SPACE(sizeof(__u32));
+		if (cbuf_sz < cmsg_len)
+			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SO_TIMESTAMPING;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+		*(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
+					    SOF_TIMESTAMPING_TX_SOFTWARE;
+	}
+	if (opt.v6.exthdr.ena) {
+		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+		cmsg_len += CMSG_SPACE(8);
+		if (cbuf_sz < cmsg_len)
+			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+		cmsg->cmsg_level = SOL_IPV6;
+		cmsg->cmsg_type = opt.v6.exthdr.val;
+		cmsg->cmsg_len = CMSG_LEN(8);
+		*(__u64 *)CMSG_DATA(cmsg) = 0;
+	}
+
+	if (cmsg_len)
+		msg->msg_controllen = cmsg_len;
+	else
+		msg->msg_control = NULL;
+}
+
+static const char *cs_ts_info2str(unsigned int info)
+{
+	static const char *names[] = {
+		[SCM_TSTAMP_SND]	= "SND",
+		[SCM_TSTAMP_SCHED]	= "SCHED",
+		[SCM_TSTAMP_ACK]	= "ACK",
+	};
+
+	if (info < ARRAY_SIZE(names))
+		return names[info];
+	return "unknown";
+}
+
+static void
+cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
+{
+	struct sock_extended_err *see;
+	struct scm_timestamping *ts;
+	struct cmsghdr *cmsg;
+	int i, err;
+
+	if (!opt.ts.ena)
+		return;
+	msg->msg_control = cbuf;
+	msg->msg_controllen = cbuf_sz;
+
+	while (true) {
+		ts = NULL;
+		see = NULL;
+		memset(cbuf, 0, cbuf_sz);
+
+		err = recvmsg(fd, msg, MSG_ERRQUEUE);
+		if (err < 0) {
+			if (errno == EAGAIN)
+				break;
+			error(ERN_RECVERR, errno, "recvmsg ERRQ");
+		}
+
+		for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+		     cmsg = CMSG_NXTHDR(msg, cmsg)) {
+			if (cmsg->cmsg_level == SOL_SOCKET &&
+			    cmsg->cmsg_type == SO_TIMESTAMPING_OLD) {
+				if (cmsg->cmsg_len < sizeof(*ts))
+					error(ERN_CMSG_RD, EINVAL, "TS cmsg");
+
+				ts = (void *)CMSG_DATA(cmsg);
+			}
+			if ((cmsg->cmsg_level == SOL_IP &&
+			     cmsg->cmsg_type == IP_RECVERR) ||
+			    (cmsg->cmsg_level == SOL_IPV6 &&
+			     cmsg->cmsg_type == IPV6_RECVERR)) {
+				if (cmsg->cmsg_len < sizeof(*see))
+					error(ERN_CMSG_RD, EINVAL, "sock_err cmsg");
+
+				see = (void *)CMSG_DATA(cmsg);
+			}
+		}
+
+		if (!ts)
+			error(ERN_CMSG_RCV, ENOENT, "TS cmsg not found");
+		if (!see)
+			error(ERN_CMSG_RCV, ENOENT, "sock_err cmsg not found");
+
+		for (i = 0; i < 3; i++) {
+			unsigned long long rel_time;
+
+			if (!ts->ts[i].tv_sec && !ts->ts[i].tv_nsec)
+				continue;
+
+			rel_time = (ts->ts[i].tv_sec - time_start_real.tv_sec) *
+				(1000ULL * 1000) +
+				(ts->ts[i].tv_nsec - time_start_real.tv_nsec) /
+				1000;
+			printf(" %5s ts%d %lluus\n",
+			       cs_ts_info2str(see->ee_info),
+			       i, rel_time);
+		}
+	}
+}
+
+static void ca_set_sockopts(int fd)
+{
+	if (opt.sockopt.mark &&
+	    setsockopt(fd, SOL_SOCKET, SO_MARK,
+		       &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
+		error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
+	if (opt.sockopt.dontfrag &&
+	    setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
+		       &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
+		error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
+	if (opt.sockopt.tclass &&
+	    setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
+		       &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+		error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
+	if (opt.sockopt.hlimit &&
+	    setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
+		       &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+		error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+	if (opt.sockopt.priority &&
+	    setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
+		       &opt.sockopt.priority, sizeof(opt.sockopt.priority)))
+		error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
+}
+
+int main(int argc, char *argv[])
+{
+	struct addrinfo hints, *ai;
+	struct iovec iov[1];
+	unsigned char *buf;
+	struct msghdr msg;
+	char cbuf[1024];
+	int err;
+	int fd;
+	int i;
+
+	cs_parse_args(argc, argv);
+
+	buf = malloc(opt.size);
+	memrnd(buf, opt.size);
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = opt.sock.family;
+
+	ai = NULL;
+	err = getaddrinfo(opt.host, opt.service, &hints, &ai);
+	if (err) {
+		fprintf(stderr, "Can't resolve address [%s]:%s\n",
+			opt.host, opt.service);
+		return ERN_SOCK_CREATE;
+	}
+
+	if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP)
+		opt.sock.proto = IPPROTO_ICMPV6;
+
+	fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto);
+	if (fd < 0) {
+		fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
+		freeaddrinfo(ai);
+		return ERN_RESOLVE;
+	}
+
+	if (opt.sock.proto == IPPROTO_ICMP) {
+		buf[0] = ICMP_ECHO;
+		buf[1] = 0;
+	} else if (opt.sock.proto == IPPROTO_ICMPV6) {
+		buf[0] = ICMPV6_ECHO_REQUEST;
+		buf[1] = 0;
+	} else if (opt.sock.type == SOCK_RAW) {
+		struct udphdr hdr = { 1, 2, htons(opt.size), 0 };
+		struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;
+
+		memcpy(buf, &hdr, sizeof(hdr));
+		sin6->sin6_port = htons(opt.sock.proto);
+	}
+
+	ca_set_sockopts(fd);
+
+	if (clock_gettime(CLOCK_REALTIME, &time_start_real))
+		error(ERN_GETTIME, errno, "gettime REALTIME");
+	if (clock_gettime(CLOCK_MONOTONIC, &time_start_mono))
+		error(ERN_GETTIME, errno, "gettime MONOTONIC");
+
+	iov[0].iov_base = buf;
+	iov[0].iov_len = opt.size;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_name = ai->ai_addr;
+	msg.msg_namelen = ai->ai_addrlen;
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 1;
+
+	cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+
+	for (i = 0; i < opt.num_pkt; i++) {
+		err = sendmsg(fd, &msg, 0);
+		if (err < 0) {
+			if (!opt.silent_send)
+				fprintf(stderr, "send failed: %s\n", strerror(errno));
+			err = ERN_SEND;
+			goto err_out;
+		} else if (err != (int)opt.size) {
+			fprintf(stderr, "short send\n");
+			err = ERN_SEND_SHORT;
+			goto err_out;
+		}
+	}
+	err = ERN_SUCCESS;
+
+	if (opt.ts.ena) {
+		/* Make sure all timestamps have time to loop back */
+		usleep(opt.txtime.delay);
+
+		cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+	}
+
+err_out:
+	close(fd);
+	freeaddrinfo(ai);
+	return err;
+}
diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
new file mode 100755
index 000000000000..772ad0cc2630
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+MARK=1000
+
+cleanup()
+{
+    cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+# Namespaces
+setup_ns NS
+
+ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+ip -netns $NS rule add fwmark $MARK lookup 300
+ip -6 -netns $NS rule add fwmark $MARK lookup 300
+ip -netns $NS route add prohibit any table 300
+ip -6 -netns $NS route add prohibit any table 300
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+    ((TOTAL++))
+    if [ $1 -ne $2 ]; then
+	echo "  Case $3 returned $1, expected $2"
+	((BAD++))
+    fi
+}
+
+for ovr in setsock cmsg both; do
+    for i in 4 6; do
+	[ $i == 4 ] && TGT=$TGT4 || TGT=$TGT6
+
+	for p in u i r; do
+	    [ $p == "u" ] && prot=UDP
+	    [ $p == "i" ] && prot=ICMP
+	    [ $p == "r" ] && prot=RAW
+
+	    [ $ovr == "setsock" ] && m="-M"
+	    [ $ovr == "cmsg" ]    && m="-m"
+	    [ $ovr == "both" ]    && m="-M $MARK -m"
+
+	    ip netns exec $NS ./cmsg_sender -$i -p $p $m $((MARK + 1)) $TGT 1234
+	    check_result $? 0 "$prot $ovr - pass"
+
+	    [ $ovr == "diff" ] && m="-M $((MARK + 1)) -m"
+
+	    ip netns exec $NS ./cmsg_sender -$i -p $p $m $MARK -s $TGT 1234
+	    check_result $? 1 "$prot $ovr - rejection"
+	done
+    done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+    echo "FAIL - $BAD/$TOTAL cases failed"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
new file mode 100755
index 000000000000..af85267ad1e3
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+
+cleanup()
+{
+    cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+# Namespaces
+setup_ns NS
+
+ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Need FQ for TXTIME
+ip netns exec $NS tc qdisc replace dev dummy0 root fq
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+    ((TOTAL++))
+    if [ $1 -ne 0 ]; then
+	echo "  Case $4 returned $1, expected 0"
+	((BAD++))
+    elif [ "$2" != "$3" ]; then
+	echo "  Case $4 returned '$2', expected '$3'"
+	((BAD++))
+    fi
+}
+
+for i in "-4 $TGT4" "-6 $TGT6"; do
+    for p in u i r; do
+	[ $p == "u" ] && prot=UDPv${i:1:2}
+	[ $p == "i" ] && prot=ICMPv${i:1:2}
+	[ $p == "r" ] && prot=RAWv${i:1:2}
+
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234)
+	check_result $? "$ts" "" "$prot - no options"
+
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t | wc -l)
+	check_result $? "$ts" "2" "$prot - ts cnt"
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t |
+		 sed -n "s/.*SCHED ts0 [0-9].*/OK/p")
+	check_result $? "$ts" "OK" "$prot - ts0 SCHED"
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t |
+		 sed -n "s/.*SND ts0 [0-9].*/OK/p")
+	check_result $? "$ts" "OK" "$prot - ts0 SND"
+
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+		 awk '/SND/ { if ($3 > 1000) print "OK"; }')
+	check_result $? "$ts" "OK" "$prot - TXTIME abs"
+
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+		 awk '/SND/ {snd=$3}
+		      /SCHED/ {sch=$3}
+		      END { if (snd - sch > 500) print "OK"; }')
+	check_result $? "$ts" "OK" "$prot - TXTIME rel"
+    done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+    echo "FAIL - $BAD/$TOTAL cases failed"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 4d5df8e1eee7..5e4390cac17e 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,7 +1,12 @@
 CONFIG_USER_NS=y
+CONFIG_NET_NS=y
+CONFIG_BONDING=m
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
 CONFIG_NUMA=y
+CONFIG_RPS=y
+CONFIG_SYSFS=y
+CONFIG_PROC_SYSCTL=y
 CONFIG_NET_VRF=y
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_IPV6=y
@@ -10,27 +15,88 @@ CONFIG_VETH=y
 CONFIG_NET_IPVTI=y
 CONFIG_IPV6_VTI=y
 CONFIG_DUMMY=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
 CONFIG_BRIDGE=y
+CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_VLAN_8021Q=y
+CONFIG_GENEVE=m
 CONFIG_IFB=y
+CONFIG_INET_DIAG=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_IP_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NETFILTER_ADVANCED=y
 CONFIG_NF_CONNTRACK=m
+CONFIG_IPV6_SIT=y
+CONFIG_IP_DCCP=m
 CONFIG_NF_NAT=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_RAW=m
 CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_L2TP_ETH=m
+CONFIG_L2TP_IP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_V3=y
+CONFIG_MACSEC=m
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MPLS=y
+CONFIG_MPTCP=y
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_NAT=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPIP=y
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_ETF=m
 CONFIG_NET_SCH_NETEM=y
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_PSAMPLE=m
+CONFIG_TCP_MD5SIG=y
 CONFIG_TEST_BLACKHOLE_DEV=m
 CONFIG_KALLSYMS=y
+CONFIG_TLS=m
 CONFIG_TRACEPOINTS=y
 CONFIG_NET_DROP_MONITOR=m
 CONFIG_NETDEVSIM=m
-CONFIG_NET_FOU=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_BAREUDP=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_CRYPTO_SM4_GENERIC=y
+CONFIG_AMT=m
+CONFIG_TUN=y
+CONFIG_VXLAN=m
+CONFIG_IP_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_CRYPTO_ARIA=y
+CONFIG_XFRM_INTERFACE=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c
new file mode 100644
index 000000000000..90eb06fefa59
--- /dev/null
+++ b/tools/testing/selftests/net/csum.c
@@ -0,0 +1,988 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP.
+ *
+ * The test runs on two machines to exercise the NIC. For this reason it
+ * is not integrated in kselftests.
+ *
+ *     CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS))
+ *
+ * Rx:
+ *
+ * The sender sends packets with a known checksum field using PF_INET(6)
+ * SOCK_RAW sockets.
+ *
+ * good packet: $CMD [-t]
+ * bad packet:  $CMD [-t] -E
+ *
+ * The receiver reads UDP packets with a UDP socket. This is not an
+ * option for TCP packets ('-t'). Optionally insert an iptables filter
+ * to avoid these entering the real protocol stack.
+ *
+ * The receiver also reads all packets with a PF_PACKET socket, to
+ * observe whether both good and bad packets arrive on the host. And to
+ * read the optional TP_STATUS_CSUM_VALID bit. This requires setting
+ * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY.
+ *
+ * Tx:
+ *
+ * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx
+ * checksum offload.
+ *
+ * The sender can sends packets with a UDP socket.
+ *
+ * Optionally crafts a packet that sums up to zero to verify that the
+ * device writes negative zero 0xFFFF in this case to distinguish from
+ * 0x0000 (checksum disabled), as required by RFC 768. Hit this case
+ * by choosing a specific source port.
+ *
+ * good packet: $CMD -U
+ * zero csum:   $CMD -U -Z
+ *
+ * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR,
+ * to cover more protocols. PF_PACKET requires passing src and dst mac
+ * addresses.
+ *
+ * good packet: $CMD -s $smac -d $dmac -p [-t]
+ *
+ * Argument '-z' sends UDP packets with a 0x000 checksum disabled field,
+ * to verify that the NIC passes these packets unmodified.
+ *
+ * Argument '-e' adds a transport mode encapsulation header between
+ * network and transport header. This will fail for devices that parse
+ *  headers. Should work on devices that implement protocol agnostic tx
+ * checksum offload (NETIF_F_HW_CSUM).
+ *
+ * Argument '-r $SEED' optionally randomizes header, payload and length
+ * to increase coverage between packets sent. SEED 1 further chooses a
+ * different seed for each run (and logs this for reproducibility). It
+ * is advised to enable this for extra coverage in continuous testing.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/virtio_net.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+
+static bool cfg_bad_csum;
+static int cfg_family = PF_INET6;
+static int cfg_num_pkt = 4;
+static bool cfg_do_rx = true;
+static bool cfg_do_tx = true;
+static bool cfg_encap;
+static char *cfg_ifname = "eth0";
+static char *cfg_mac_dst;
+static char *cfg_mac_src;
+static int cfg_proto = IPPROTO_UDP;
+static int cfg_payload_char = 'a';
+static int cfg_payload_len = 100;
+static uint16_t cfg_port_dst = 34000;
+static uint16_t cfg_port_src = 33000;
+static uint16_t cfg_port_src_encap = 33001;
+static unsigned int cfg_random_seed;
+static int cfg_rcvbuf = 1 << 22;	/* be able to queue large cfg_num_pkt */
+static bool cfg_send_pfpacket;
+static bool cfg_send_udp;
+static int cfg_timeout_ms = 2000;
+static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */
+static bool cfg_zero_sum;     /* create packet that adds up to zero */
+
+static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET};
+static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET};
+static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6};
+static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6};
+
+#define ENC_HEADER_LEN	(sizeof(struct udphdr) + sizeof(struct udp_encap_hdr))
+#define MAX_HEADER_LEN	(sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr))
+#define MAX_PAYLOAD_LEN 1024
+
+/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */
+struct udp_encap_hdr {
+	uint8_t nexthdr;
+	uint8_t padding[3];
+};
+
+/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */
+static void *iph_addr_p;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL);
+}
+
+static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = (uint16_t *)data;
+	int i;
+
+	for (i = 0; i < len / 2; i++)
+		sum += words[i];
+
+	if (len & 1)
+		sum += ((unsigned char *)data)[len - 1];
+
+	return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+	sum = checksum_nofold(data, len, sum);
+
+	while (sum > 0xFFFF)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+
+	return ~sum;
+}
+
+static uint16_t checksum(void *th, uint16_t proto, size_t len)
+{
+	uint32_t sum;
+	int alen;
+
+	alen = cfg_family == PF_INET6 ? 32 : 8;
+
+	sum = checksum_nofold(iph_addr_p, alen, 0);
+	sum += htons(proto);
+	sum += htons(len);
+
+	/* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */
+	if (cfg_do_tx && cfg_send_pfpacket)
+		return ~checksum_fold(NULL, 0, sum);
+	else
+		return checksum_fold(th, len, sum);
+}
+
+static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len)
+{
+	struct iphdr *iph = _iph;
+
+	memset(iph, 0, sizeof(*iph));
+
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->ttl = 8;
+	iph->protocol = proto;
+	iph->saddr = cfg_saddr4.sin_addr.s_addr;
+	iph->daddr = cfg_daddr4.sin_addr.s_addr;
+	iph->tot_len = htons(sizeof(*iph) + len);
+	iph->check = checksum_fold(iph, sizeof(*iph), 0);
+
+	iph_addr_p = &iph->saddr;
+
+	return iph + 1;
+}
+
+static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len)
+{
+	struct ipv6hdr *ip6h = _ip6h;
+
+	memset(ip6h, 0, sizeof(*ip6h));
+
+	ip6h->version = 6;
+	ip6h->payload_len = htons(len);
+	ip6h->nexthdr = proto;
+	ip6h->hop_limit = 64;
+	ip6h->saddr = cfg_saddr6.sin6_addr;
+	ip6h->daddr = cfg_daddr6.sin6_addr;
+
+	iph_addr_p = &ip6h->saddr;
+
+	return ip6h + 1;
+}
+
+static void *build_packet_udp(void *_uh)
+{
+	struct udphdr *uh = _uh;
+
+	uh->source = htons(cfg_port_src);
+	uh->dest = htons(cfg_port_dst);
+	uh->len = htons(sizeof(*uh) + cfg_payload_len);
+	uh->check = 0;
+
+	/* choose source port so that uh->check adds up to zero */
+	if (cfg_zero_sum) {
+		uh->source = 0;
+		uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
+
+		fprintf(stderr, "tx: changing sport: %hu -> %hu\n",
+			cfg_port_src, ntohs(uh->source));
+		cfg_port_src = ntohs(uh->source);
+	}
+
+	if (cfg_zero_disable)
+		uh->check = 0;
+	else
+		uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
+
+	if (cfg_bad_csum)
+		uh->check = ~uh->check;
+
+	fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check);
+	return uh + 1;
+}
+
+static void *build_packet_tcp(void *_th)
+{
+	struct tcphdr *th = _th;
+
+	th->source = htons(cfg_port_src);
+	th->dest = htons(cfg_port_dst);
+	th->doff = 5;
+	th->check = 0;
+
+	th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len);
+
+	if (cfg_bad_csum)
+		th->check = ~th->check;
+
+	fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check);
+	return th + 1;
+}
+
+static char *build_packet_udp_encap(void *_uh)
+{
+	struct udphdr *uh = _uh;
+	struct udp_encap_hdr *eh = _uh + sizeof(*uh);
+
+	/* outer dst == inner dst, to simplify BPF filter
+	 * outer src != inner src, to demultiplex on recv
+	 */
+	uh->dest = htons(cfg_port_dst);
+	uh->source = htons(cfg_port_src_encap);
+	uh->check = 0;
+	uh->len = htons(sizeof(*uh) +
+			sizeof(*eh) +
+			sizeof(struct tcphdr) +
+			cfg_payload_len);
+
+	eh->nexthdr = IPPROTO_TCP;
+
+	return build_packet_tcp(eh + 1);
+}
+
+static char *build_packet(char *buf, int max_len, int *len)
+{
+	uint8_t proto;
+	char *off;
+	int tlen;
+
+	if (cfg_random_seed) {
+		int *buf32 = (void *)buf;
+		int i;
+
+		for (i = 0; i < (max_len / sizeof(int)); i++)
+			buf32[i] = rand();
+	} else {
+		memset(buf, cfg_payload_char, max_len);
+	}
+
+	if (cfg_proto == IPPROTO_UDP)
+		tlen = sizeof(struct udphdr) + cfg_payload_len;
+	else
+		tlen = sizeof(struct tcphdr) + cfg_payload_len;
+
+	if (cfg_encap) {
+		proto = IPPROTO_UDP;
+		tlen += ENC_HEADER_LEN;
+	} else {
+		proto = cfg_proto;
+	}
+
+	if (cfg_family == PF_INET)
+		off = build_packet_ipv4(buf, proto, tlen);
+	else
+		off = build_packet_ipv6(buf, proto, tlen);
+
+	if (cfg_encap)
+		off = build_packet_udp_encap(off);
+	else if (cfg_proto == IPPROTO_UDP)
+		off = build_packet_udp(off);
+	else
+		off = build_packet_tcp(off);
+
+	/* only pass the payload, but still compute headers for cfg_zero_sum */
+	if (cfg_send_udp) {
+		*len = cfg_payload_len;
+		return off;
+	}
+
+	*len = off - buf + cfg_payload_len;
+	return buf;
+}
+
+static int open_inet(int ipproto, int protocol)
+{
+	int fd;
+
+	fd = socket(cfg_family, ipproto, protocol);
+	if (fd == -1)
+		error(1, errno, "socket inet");
+
+	if (cfg_family == PF_INET6) {
+		/* may have been updated by cfg_zero_sum */
+		cfg_saddr6.sin6_port = htons(cfg_port_src);
+
+		if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6)))
+			error(1, errno, "bind dgram 6");
+		if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
+			error(1, errno, "connect dgram 6");
+	} else {
+		/* may have been updated by cfg_zero_sum */
+		cfg_saddr4.sin_port = htons(cfg_port_src);
+
+		if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4)))
+			error(1, errno, "bind dgram 4");
+		if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
+			error(1, errno, "connect dgram 4");
+	}
+
+	return fd;
+}
+
+static int open_packet(void)
+{
+	int fd, one = 1;
+
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket packet");
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+		error(1, errno, "setsockopt packet_vnet_ndr");
+
+	return fd;
+}
+
+static void send_inet(int fd, const char *buf, int len)
+{
+	int ret;
+
+	ret = write(fd, buf, len);
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret != len)
+		error(1, 0, "write: %d", ret);
+}
+
+static void eth_str_to_addr(const char *str, unsigned char *eth)
+{
+	if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+		   &eth[0], &eth[1], &eth[2], &eth[3], &eth[4], &eth[5]) != 6)
+		error(1, 0, "cannot parse mac addr %s", str);
+}
+
+static void send_packet(int fd, const char *buf, int len)
+{
+	struct virtio_net_hdr vh = {0};
+	struct sockaddr_ll addr = {0};
+	struct msghdr msg = {0};
+	struct ethhdr eth;
+	struct iovec iov[3];
+	int ret;
+
+	addr.sll_family = AF_PACKET;
+	addr.sll_halen = ETH_ALEN;
+	addr.sll_ifindex = if_nametoindex(cfg_ifname);
+	if (!addr.sll_ifindex)
+		error(1, errno, "if_nametoindex %s", cfg_ifname);
+
+	vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+	if (cfg_family == PF_INET6) {
+		vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+		addr.sll_protocol = htons(ETH_P_IPV6);
+	} else {
+		vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr);
+		addr.sll_protocol = htons(ETH_P_IP);
+	}
+
+	if (cfg_encap)
+		vh.csum_start += ENC_HEADER_LEN;
+
+	if (cfg_proto == IPPROTO_TCP) {
+		vh.csum_offset = __builtin_offsetof(struct tcphdr, check);
+		vh.hdr_len = vh.csum_start + sizeof(struct tcphdr);
+	} else {
+		vh.csum_offset = __builtin_offsetof(struct udphdr, check);
+		vh.hdr_len = vh.csum_start + sizeof(struct udphdr);
+	}
+
+	eth_str_to_addr(cfg_mac_src, eth.h_source);
+	eth_str_to_addr(cfg_mac_dst, eth.h_dest);
+	eth.h_proto = addr.sll_protocol;
+
+	iov[0].iov_base = &vh;
+	iov[0].iov_len = sizeof(vh);
+
+	iov[1].iov_base = &eth;
+	iov[1].iov_len = sizeof(eth);
+
+	iov[2].iov_base = (void *)buf;
+	iov[2].iov_len = len;
+
+	msg.msg_iov = iov;
+	msg.msg_iovlen = ARRAY_SIZE(iov);
+
+	msg.msg_name = &addr;
+	msg.msg_namelen = sizeof(addr);
+
+	ret = sendmsg(fd, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "sendmsg packet");
+	if (ret != sizeof(vh) + sizeof(eth) + len)
+		error(1, errno, "sendmsg packet: %u", ret);
+}
+
+static int recv_prepare_udp(void)
+{
+	int fd;
+
+	fd = socket(cfg_family, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+		       &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
+		error(1, errno, "setsockopt SO_RCVBUF r");
+
+	if (cfg_family == PF_INET6) {
+		if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
+			error(1, errno, "bind r");
+	} else {
+		if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
+			error(1, errno, "bind r");
+	}
+
+	return fd;
+}
+
+/* Filter out all traffic that is not cfg_proto with our destination port.
+ *
+ * Otherwise background noise may cause PF_PACKET receive queue overflow,
+ * dropping the expected packets and failing the test.
+ */
+static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+		BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2),
+		BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+	};
+	struct sock_fprog prog = {};
+
+	prog.filter = filter;
+	prog.len = ARRAY_SIZE(filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+		error(1, errno, "setsockopt filter");
+}
+
+static void recv_prepare_packet_filter(int fd)
+{
+	const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+
+	if (cfg_family == AF_INET)
+		__recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol),
+					     sizeof(struct iphdr) + off_dport);
+	else
+		__recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr),
+					     sizeof(struct ipv6hdr) + off_dport);
+}
+
+static void recv_prepare_packet_bind(int fd)
+{
+	struct sockaddr_ll laddr = {0};
+
+	laddr.sll_family = AF_PACKET;
+
+	if (cfg_family == PF_INET)
+		laddr.sll_protocol = htons(ETH_P_IP);
+	else
+		laddr.sll_protocol = htons(ETH_P_IPV6);
+
+	laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+	if (!laddr.sll_ifindex)
+		error(1, 0, "if_nametoindex %s", cfg_ifname);
+
+	if (bind(fd, (void *)&laddr, sizeof(laddr)))
+		error(1, errno, "bind pf_packet");
+}
+
+static int recv_prepare_packet(void)
+{
+	int fd, one = 1;
+
+	fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket p");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+		       &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
+		error(1, errno, "setsockopt SO_RCVBUF p");
+
+	/* enable auxdata to recv checksum status (valid vs unknown) */
+	if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one)))
+		error(1, errno, "setsockopt auxdata");
+
+	/* install filter to restrict packet flow to match */
+	recv_prepare_packet_filter(fd);
+
+	/* bind to address family to start packet flow */
+	recv_prepare_packet_bind(fd);
+
+	return fd;
+}
+
+static int recv_udp(int fd)
+{
+	static char buf[MAX_PAYLOAD_LEN];
+	int ret, count = 0;
+
+	while (1) {
+		ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret == -1)
+			error(1, errno, "recv r");
+
+		fprintf(stderr, "rx: udp: len=%u\n", ret);
+		count++;
+	}
+
+	return count;
+}
+
+static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field)
+{
+	uint16_t csum;
+
+	csum = checksum(th, cfg_proto, len);
+
+	fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n",
+		sport, len, csum_field, csum);
+
+	/* csum must be zero unless cfg_bad_csum indicates bad csum */
+	if (csum && !cfg_bad_csum) {
+		fprintf(stderr, "pkt: bad csum\n");
+		return 1;
+	} else if (cfg_bad_csum && !csum) {
+		fprintf(stderr, "pkt: good csum, while bad expected\n");
+		return 1;
+	}
+
+	if (cfg_zero_sum && csum_field != 0xFFFF) {
+		fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int recv_verify_packet_tcp(void *th, int len)
+{
+	struct tcphdr *tcph = th;
+
+	if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst))
+		return -1;
+
+	return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check);
+}
+
+static int recv_verify_packet_udp_encap(void *th, int len)
+{
+	struct udp_encap_hdr *eh = th;
+
+	if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP)
+		return -1;
+
+	return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh));
+}
+
+static int recv_verify_packet_udp(void *th, int len)
+{
+	struct udphdr *udph = th;
+
+	if (len < sizeof(*udph))
+		return -1;
+
+	if (udph->dest != htons(cfg_port_dst))
+		return -1;
+
+	if (udph->source == htons(cfg_port_src_encap))
+		return recv_verify_packet_udp_encap(udph + 1,
+						    len - sizeof(*udph));
+
+	return recv_verify_csum(th, len, ntohs(udph->source), udph->check);
+}
+
+static int recv_verify_packet_ipv4(void *nh, int len)
+{
+	struct iphdr *iph = nh;
+	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+
+	if (len < sizeof(*iph) || iph->protocol != proto)
+		return -1;
+
+	iph_addr_p = &iph->saddr;
+	if (proto == IPPROTO_TCP)
+		return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
+	else
+		return recv_verify_packet_udp(iph + 1, len - sizeof(*iph));
+}
+
+static int recv_verify_packet_ipv6(void *nh, int len)
+{
+	struct ipv6hdr *ip6h = nh;
+	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+
+	if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
+		return -1;
+
+	iph_addr_p = &ip6h->saddr;
+
+	if (proto == IPPROTO_TCP)
+		return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+	else
+		return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+}
+
+/* return whether auxdata includes TP_STATUS_CSUM_VALID */
+static bool recv_verify_packet_csum(struct msghdr *msg)
+{
+	struct tpacket_auxdata *aux = NULL;
+	struct cmsghdr *cm;
+
+	if (msg->msg_flags & MSG_CTRUNC)
+		error(1, 0, "cmsg: truncated");
+
+	for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) {
+		if (cm->cmsg_level != SOL_PACKET ||
+		    cm->cmsg_type != PACKET_AUXDATA)
+			error(1, 0, "cmsg: level=%d type=%d\n",
+			      cm->cmsg_level, cm->cmsg_type);
+
+		if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
+			error(1, 0, "cmsg: len=%lu expected=%lu",
+			      cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
+
+		aux = (void *)CMSG_DATA(cm);
+	}
+
+	if (!aux)
+		error(1, 0, "cmsg: no auxdata");
+
+	return aux->tp_status & TP_STATUS_CSUM_VALID;
+}
+
+static int recv_packet(int fd)
+{
+	static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
+	unsigned long total = 0, bad_csums = 0, bad_validations = 0;
+	char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
+	struct pkt *buf = (void *)_buf;
+	struct msghdr msg = {0};
+	struct iovec iov;
+	int len, ret;
+
+	iov.iov_base = _buf;
+	iov.iov_len = sizeof(_buf);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_control = ctrl;
+	msg.msg_controllen = sizeof(ctrl);
+
+	while (1) {
+		msg.msg_flags = 0;
+
+		len = recvmsg(fd, &msg, MSG_DONTWAIT);
+		if (len == -1 && errno == EAGAIN)
+			break;
+		if (len == -1)
+			error(1, errno, "recv p");
+
+		if (cfg_family == PF_INET6)
+			ret = recv_verify_packet_ipv6(buf, len);
+		else
+			ret = recv_verify_packet_ipv4(buf, len);
+
+		if (ret == -1 /* skip: non-matching */)
+			continue;
+
+		total++;
+		if (ret == 1)
+			bad_csums++;
+
+		/* Fail if kernel returns valid for known bad csum.
+		 * Do not fail if kernel does not validate a good csum:
+		 * Absence of validation does not imply invalid.
+		 */
+		if (recv_verify_packet_csum(&msg) && cfg_bad_csum) {
+			fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
+			bad_validations++;
+		}
+	}
+
+	if (bad_csums || bad_validations)
+		error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n",
+		      total, bad_csums, bad_validations);
+
+	return total;
+}
+
+static void parse_args(int argc, char *const argv[])
+{
+	const char *daddr = NULL, *saddr = NULL;
+	int c;
+
+	while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = PF_INET;
+			break;
+		case '6':
+			cfg_family = PF_INET6;
+			break;
+		case 'd':
+			cfg_mac_dst = optarg;
+			break;
+		case 'D':
+			daddr = optarg;
+			break;
+		case 'e':
+			cfg_encap = true;
+			break;
+		case 'E':
+			cfg_bad_csum = true;
+			break;
+		case 'i':
+			cfg_ifname = optarg;
+			break;
+		case 'l':
+			cfg_payload_len = strtol(optarg, NULL, 0);
+			break;
+		case 'L':
+			cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000;
+			break;
+		case 'n':
+			cfg_num_pkt = strtol(optarg, NULL, 0);
+			break;
+		case 'r':
+			cfg_random_seed = strtol(optarg, NULL, 0);
+			break;
+		case 'P':
+			cfg_send_pfpacket = true;
+			break;
+		case 'R':
+			/* only Rx: used with two machine tests */
+			cfg_do_tx = false;
+			break;
+		case 's':
+			cfg_mac_src = optarg;
+			break;
+		case 'S':
+			saddr = optarg;
+			break;
+		case 't':
+			cfg_proto = IPPROTO_TCP;
+			break;
+		case 'T':
+			/* only Tx: used with two machine tests */
+			cfg_do_rx = false;
+			break;
+		case 'u':
+			cfg_proto = IPPROTO_UDP;
+			break;
+		case 'U':
+			/* send using real udp socket,
+			 * to exercise tx checksum offload
+			 */
+			cfg_send_udp = true;
+			break;
+		case 'z':
+			cfg_zero_disable = true;
+			break;
+		case 'Z':
+			cfg_zero_sum = true;
+			break;
+		default:
+			error(1, 0, "unknown arg %c", c);
+		}
+	}
+
+	if (!daddr || !saddr)
+		error(1, 0, "Must pass -D <daddr> and -S <saddr>");
+
+	if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst))
+		error(1, 0, "Transmit with pf_packet requires mac addresses");
+
+	if (cfg_payload_len > MAX_PAYLOAD_LEN)
+		error(1, 0, "Payload length exceeds max");
+
+	if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable))
+		error(1, 0, "Only UDP supports zero csum");
+
+	if (cfg_zero_sum && !cfg_send_udp)
+		error(1, 0, "Zero checksum conversion requires -U for tx csum offload");
+	if (cfg_zero_sum && cfg_bad_csum)
+		error(1, 0, "Cannot combine zero checksum conversion and invalid checksum");
+	if (cfg_zero_sum && cfg_random_seed)
+		error(1, 0, "Cannot combine zero checksum conversion with randomization");
+
+	if (cfg_family == PF_INET6) {
+		cfg_saddr6.sin6_port = htons(cfg_port_src);
+		cfg_daddr6.sin6_port = htons(cfg_port_dst);
+
+		if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1)
+			error(1, errno, "Cannot parse ipv6 -D");
+		if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1)
+			error(1, errno, "Cannot parse ipv6 -S");
+	} else {
+		cfg_saddr4.sin_port = htons(cfg_port_src);
+		cfg_daddr4.sin_port = htons(cfg_port_dst);
+
+		if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1)
+			error(1, errno, "Cannot parse ipv4 -D");
+		if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1)
+			error(1, errno, "Cannot parse ipv4 -S");
+	}
+
+	if (cfg_do_tx && cfg_random_seed) {
+		/* special case: time-based seed */
+		if (cfg_random_seed == 1)
+			cfg_random_seed = (unsigned int)gettimeofday_ms();
+		srand(cfg_random_seed);
+		fprintf(stderr, "randomization seed: %u\n", cfg_random_seed);
+	}
+}
+
+static void do_tx(void)
+{
+	static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
+	char *buf;
+	int fd, len, i;
+
+	buf = build_packet(_buf, sizeof(_buf), &len);
+
+	if (cfg_send_pfpacket)
+		fd = open_packet();
+	else if (cfg_send_udp)
+		fd = open_inet(SOCK_DGRAM, 0);
+	else
+		fd = open_inet(SOCK_RAW, IPPROTO_RAW);
+
+	for (i = 0; i < cfg_num_pkt; i++) {
+		if (cfg_send_pfpacket)
+			send_packet(fd, buf, len);
+		else
+			send_inet(fd, buf, len);
+
+		/* randomize each packet individually to increase coverage */
+		if (cfg_random_seed) {
+			cfg_payload_len = rand() % MAX_PAYLOAD_LEN;
+			buf = build_packet(_buf, sizeof(_buf), &len);
+		}
+	}
+
+	if (close(fd))
+		error(1, errno, "close tx");
+}
+
+static void do_rx(int fdp, int fdr)
+{
+	unsigned long count_udp = 0, count_pkt = 0;
+	long tleft, tstop;
+	struct pollfd pfd;
+
+	tstop = gettimeofday_ms() + cfg_timeout_ms;
+	tleft = cfg_timeout_ms;
+
+	do {
+		pfd.events = POLLIN;
+		pfd.fd = fdp;
+		if (poll(&pfd, 1, tleft) == -1)
+			error(1, errno, "poll");
+
+		if (pfd.revents & POLLIN)
+			count_pkt += recv_packet(fdp);
+
+		if (cfg_proto == IPPROTO_UDP)
+			count_udp += recv_udp(fdr);
+
+		tleft = tstop - gettimeofday_ms();
+	} while (tleft > 0);
+
+	if (close(fdr))
+		error(1, errno, "close r");
+	if (close(fdp))
+		error(1, errno, "close p");
+
+	if (count_pkt < cfg_num_pkt)
+		error(1, 0, "rx: missing packets at pf_packet: %lu < %u",
+		      count_pkt, cfg_num_pkt);
+
+	if (cfg_proto == IPPROTO_UDP) {
+		if (cfg_bad_csum && count_udp)
+			error(1, 0, "rx: unexpected packets at udp");
+		if (!cfg_bad_csum && !count_udp)
+			error(1, 0, "rx: missing packets at udp");
+	}
+}
+
+int main(int argc, char *const argv[])
+{
+	int fdp = -1, fdr = -1;		/* -1 to silence -Wmaybe-uninitialized */
+
+	parse_args(argc, argv);
+
+	/* open receive sockets before transmitting */
+	if (cfg_do_rx) {
+		fdp = recv_prepare_packet();
+		fdr = recv_prepare_udp();
+	}
+
+	if (cfg_do_tx)
+		do_tx();
+
+	if (cfg_do_rx)
+		do_rx(fdp, fdr);
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 58bb7e9b88ce..2d84c7a0be6b 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
 from subprocess import PIPE, Popen
@@ -18,6 +18,8 @@ import sys
 #
 
 
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
 Port = collections.namedtuple('Port', 'bus_info name')
 
 
@@ -57,6 +59,8 @@ class devlink_ports(object):
         assert stderr == ""
         ports = json.loads(stdout)['port']
 
+        validate_devlink_output(ports, 'flavour')
+
         for port in ports:
             if dev in port:
                 if ports[port]['flavour'] == 'physical':
@@ -218,6 +222,27 @@ def split_splittable_port(port, k, lanes, dev):
     unsplit(port.bus_info)
 
 
+def validate_devlink_output(devlink_data, target_property=None):
+    """
+    Determine if test should be skipped by checking:
+      1. devlink_data contains values
+      2. The target_property exist in devlink_data
+    """
+    skip_reason = None
+    if any(devlink_data.values()):
+        if target_property:
+            skip_reason = "{} not found in devlink output, test skipped".format(target_property)
+            for key in devlink_data:
+                if target_property in devlink_data[key]:
+                    skip_reason = None
+    else:
+        skip_reason = 'devlink output is empty, test skipped'
+
+    if skip_reason:
+        print(skip_reason)
+        sys.exit(KSFT_SKIP)
+
+
 def make_parser():
     parser = argparse.ArgumentParser(description='A test for port splitting.')
     parser.add_argument('--dev',
@@ -238,6 +263,7 @@ def main(cmdline=None):
         stdout, stderr = run_command(cmd)
         assert stderr == ""
 
+        validate_devlink_output(json.loads(stdout))
         devs = json.loads(stdout)['dev']
         dev = list(devs.keys())[0]
 
@@ -249,6 +275,7 @@ def main(cmdline=None):
 
     ports = devlink_ports(dev)
 
+    found_max_lanes = False
     for port in ports.if_names:
         max_lanes = get_max_lanes(port.name)
 
@@ -271,6 +298,11 @@ def main(cmdline=None):
                 split_splittable_port(port, lane, max_lanes, dev)
 
                 lane //= 2
+        found_max_lanes = True
+
+    if not found_max_lanes:
+        print(f"Test not started, no port of device {dev} reports max_lanes")
+        sys.exit(KSFT_SKIP)
 
 
 if __name__ == "__main__":
diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh
index b7650e30d18b..7c4818c971fc 100755
--- a/tools/testing/selftests/net/drop_monitor_tests.sh
+++ b/tools/testing/selftests/net/drop_monitor_tests.sh
@@ -2,10 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 # This test is for checking drop monitor functionality.
-
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
 TESTS="
@@ -13,10 +11,6 @@ TESTS="
 	hw_drops
 "
 
-IP="ip -netns ns1"
-TC="tc -netns ns1"
-DEVLINK="devlink -N ns1"
-NS_EXEC="ip netns exec ns1"
 NETDEVSIM_PATH=/sys/bus/netdevsim/
 DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
@@ -43,7 +37,7 @@ setup()
 	modprobe netdevsim &> /dev/null
 
 	set -e
-	ip netns add ns1
+	setup_ns NS1
 	$IP link add dummy10 up type dummy
 
 	$NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
@@ -57,7 +51,7 @@ setup()
 cleanup()
 {
 	$NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
-	ip netns del ns1
+	cleanup_ns ${NS1}
 }
 
 sw_drops_test()
@@ -194,8 +188,15 @@ if [ $? -ne 0 ]; then
 	exit $ksft_skip
 fi
 
-# start clean
+# create netns first so we can get the namespace name
+setup_ns NS1
 cleanup &> /dev/null
+trap cleanup EXIT
+
+IP="ip -netns ${NS1}"
+TC="tc -netns ${NS1}"
+DEVLINK="devlink -N ${NS1}"
+NS_EXEC="ip netns exec ${NS1}"
 
 for t in $TESTS
 do
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index fb5c55dd6df8..386ebd829df5 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -37,6 +37,10 @@
 #
 # server / client nomenclature relative to ns-A
 
+source lib.sh
+
+PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH
+
 VERBOSE=0
 
 NSA_DEV=eth1
@@ -63,6 +67,14 @@ NSB_LO_IP=172.16.2.2
 NSA_LO_IP6=2001:db8:2::1
 NSB_LO_IP6=2001:db8:2::2
 
+# non-local addresses for freebind tests
+NL_IP=172.17.1.1
+NL_IP6=2001:db8:4::1
+
+# multicast and broadcast addresses
+MCAST_IP=224.0.0.1
+BCAST_IP=255.255.255.255
+
 MD5_PW=abc123
 MD5_WRONG_PW=abc1234
 
@@ -71,16 +83,15 @@ MCAST=ff02::1
 NSA_LINKIP6=
 NSB_LINKIP6=
 
-NSA=ns-A
-NSB=ns-B
-NSC=ns-C
-
-NSA_CMD="ip netns exec ${NSA}"
-NSB_CMD="ip netns exec ${NSB}"
-NSC_CMD="ip netns exec ${NSC}"
-
 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 
+# Check if FIPS mode is enabled
+if [ -f /proc/sys/crypto/fips_enabled ]; then
+	fips_enabled=`cat /proc/sys/crypto/fips_enabled`
+else
+	fips_enabled=0
+fi
+
 ################################################################################
 # utilities
 
@@ -89,6 +100,7 @@ log_test()
 	local rc=$1
 	local expected=$2
 	local msg="$3"
+	local ans
 
 	[ "${VERBOSE}" = "1" ] && echo
 
@@ -98,19 +110,20 @@ log_test()
 	else
 		nfail=$((nfail+1))
 		printf "TEST: %-70s  [FAIL]\n" "${msg}"
+		echo "    expected rc $expected; actual rc $rc"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
 			echo
 			echo "hit enter to continue, 'q' to quit"
-			read a
-			[ "$a" = "q" ] && exit 1
+			read ans
+			[ "$ans" = "q" ] && exit 1
 		fi
 	fi
 
 	if [ "${PAUSE}" = "yes" ]; then
 		echo
 		echo "hit enter to continue, 'q' to quit"
-		read a
-		[ "$a" = "q" ] && exit 1
+		read ans
+		[ "$ans" = "q" ] && exit 1
 	fi
 
 	kill_procs
@@ -179,6 +192,15 @@ kill_procs()
 	sleep 1
 }
 
+set_ping_group()
+{
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'"
+	fi
+
+	${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'
+}
+
 do_run_cmd()
 {
 	local cmd="$*"
@@ -256,6 +278,28 @@ setup_cmd_nsb()
 	fi
 }
 
+setup_cmd_nsc()
+{
+	local cmd="$*"
+	local rc
+
+	run_cmd_nsc ${cmd}
+	rc=$?
+	if [ $rc -ne 0 ]; then
+		# show user the command if not done so already
+		if [ "$VERBOSE" = "0" ]; then
+			echo "setup command: $cmd"
+		fi
+		echo "failed. stopping tests"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue"
+			read a
+		fi
+		exit $rc
+	fi
+}
+
 # set sysctl values in NS-A
 set_sysctl()
 {
@@ -264,6 +308,12 @@ set_sysctl()
 	run_cmd sysctl -q -w $*
 }
 
+# get sysctl values in NS-A
+get_sysctl()
+{
+	${NSA_CMD} sysctl -n $*
+}
+
 ################################################################################
 # Setup for tests
 
@@ -273,6 +323,9 @@ addr2str()
 	127.0.0.1) echo "loopback";;
 	::1) echo "IPv6 loopback";;
 
+	${BCAST_IP}) echo "broadcast";;
+	${MCAST_IP}) echo "multicast";;
+
 	${NSA_IP})	echo "ns-A IP";;
 	${NSA_IP6})	echo "ns-A IPv6";;
 	${NSA_LO_IP})	echo "ns-A loopback IP";;
@@ -285,6 +338,9 @@ addr2str()
 	${NSB_LO_IP6})	echo "ns-B loopback IPv6";;
 	${NSB_LINKIP6}|${NSB_LINKIP6}%*) echo "ns-B IPv6 LLA";;
 
+	${NL_IP})       echo "nonlocal IP";;
+	${NL_IP6})      echo "nonlocal IPv6";;
+
 	${VRF_IP})	echo "VRF IP";;
 	${VRF_IP6})	echo "VRF IPv6";;
 
@@ -354,9 +410,6 @@ create_ns()
 	local addr=$2
 	local addr6=$3
 
-	ip netns add ${ns}
-
-	ip -netns ${ns} link set lo up
 	if [ "${addr}" != "-" ]; then
 		ip -netns ${ns} addr add dev lo ${addr}
 	fi
@@ -414,13 +467,33 @@ cleanup()
 		ip -netns ${NSA} link set dev ${NSA_DEV} down
 		ip -netns ${NSA} link del dev ${NSA_DEV}
 
-		ip netns del ${NSA}
+		ip netns pids ${NSA} | xargs kill 2>/dev/null
+		cleanup_ns ${NSA}
 	fi
 
-	ip netns del ${NSB}
+	ip netns pids ${NSB} | xargs kill 2>/dev/null
+	ip netns pids ${NSC} | xargs kill 2>/dev/null
+	cleanup_ns ${NSB} ${NSC}
+}
+
+cleanup_vrf_dup()
+{
+	ip link del ${NSA_DEV2} >/dev/null 2>&1
+	ip netns pids ${NSC} | xargs kill 2>/dev/null
 	ip netns del ${NSC} >/dev/null 2>&1
 }
 
+setup_vrf_dup()
+{
+	# some VRF tests use ns-C which has the same config as
+	# ns-B but for a device NOT in the VRF
+	setup_ns NSC
+	NSC_CMD="ip netns exec ${NSC}"
+	create_ns ${NSC} "-" "-"
+	connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
+		   ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
+}
+
 setup()
 {
 	local with_vrf=${1}
@@ -432,6 +505,10 @@ setup()
 	log_debug "Configuring network namespaces"
 	set -e
 
+	setup_ns NSA NSB
+	NSA_CMD="ip netns exec ${NSA}"
+	NSB_CMD="ip netns exec ${NSB}"
+
 	create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128
 	create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128
 	connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \
@@ -450,12 +527,6 @@ setup()
 
 		ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV}
 		ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV}
-
-		# some VRF tests use ns-C which has the same config as
-		# ns-B but for a device NOT in the VRF
-		create_ns ${NSC} "-" "-"
-		connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
-			   ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
 	else
 		ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV}
 		ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV}
@@ -471,6 +542,40 @@ setup()
 	sleep 1
 }
 
+setup_lla_only()
+{
+	# make sure we are starting with a clean slate
+	kill_procs
+	cleanup 2>/dev/null
+
+	log_debug "Configuring network namespaces"
+	set -e
+
+	setup_ns NSA NSB NSC
+	NSA_CMD="ip netns exec ${NSA}"
+	NSB_CMD="ip netns exec ${NSB}"
+	NSC_CMD="ip netns exec ${NSC}"
+	create_ns ${NSA} "-" "-"
+	create_ns ${NSB} "-" "-"
+	create_ns ${NSC} "-" "-"
+	connect_ns ${NSA} ${NSA_DEV} "-" "-" \
+		   ${NSB} ${NSB_DEV} "-" "-"
+	connect_ns ${NSA} ${NSA_DEV2} "-" "-" \
+		   ${NSC} ${NSC_DEV}  "-" "-"
+
+	NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV})
+	NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV})
+	NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV})
+
+	create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-"
+	ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF}
+	ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF}
+
+	set +e
+
+	sleep 1
+}
+
 ################################################################################
 # IPv4
 
@@ -497,6 +602,20 @@ ipv4_ping_novrf()
 	done
 
 	#
+	# out, but don't use gateway if peer is not on link
+	#
+	a=${NSB_IP}
+	log_start
+	run_cmd ping -c 1 -w 1 -r ${a}
+	log_test_addr ${a} $? 0 "ping out (don't route), peer on link"
+
+	a=${NSB_LO_IP}
+	log_start
+	show_hint "Fails since peer is not on link"
+	run_cmd ping -c 1 -w 1 -r ${a}
+	log_test_addr ${a} $? 1 "ping out (don't route), peer not on link"
+
+	#
 	# in
 	#
 	for a in ${NSA_IP} ${NSA_LO_IP}
@@ -669,7 +788,7 @@ ipv4_ping_vrf()
 		log_start
 		show_hint "Fails since address on vrf device is out of device scope"
 		run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
-		log_test_addr ${a} $? 1 "ping local, device bind"
+		log_test_addr ${a} $? 2 "ping local, device bind"
 	done
 
 	#
@@ -729,10 +848,16 @@ ipv4_ping()
 	setup
 	set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
 	ipv4_ping_novrf
+	setup
+	set_ping_group
+	ipv4_ping_novrf
 
 	log_subsection "With VRF"
 	setup "yes"
 	ipv4_ping_vrf
+	setup "yes"
+	set_ping_group
+	ipv4_ping_vrf
 }
 
 ################################################################################
@@ -749,9 +874,9 @@ ipv4_tcp_md5_novrf()
 
 	# basic use case
 	log_start
-	run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} &
+	run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: Single address config"
 
 	# client sends MD5, server not configured
@@ -759,23 +884,23 @@ ipv4_tcp_md5_novrf()
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -s &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: Server no config, client uses password"
 
 	# wrong password
 	log_start
 	show_hint "Should timeout since client uses wrong password"
-	run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} &
+	run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: Client uses wrong password"
 
 	# client from different address
 	log_start
 	show_hint "Should timeout due to MD5 mismatch"
-	run_cmd nettest -s -M ${MD5_PW} -r ${NSB_LO_IP} &
+	run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: Client address does not match address configured with password"
 
 	#
@@ -786,7 +911,7 @@ ipv4_tcp_md5_novrf()
 	log_start
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsb nettest  -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest  -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: Prefix config"
 
 	# client in prefix, wrong password
@@ -794,7 +919,7 @@ ipv4_tcp_md5_novrf()
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: Prefix config, client uses wrong password"
 
 	# client outside of prefix
@@ -802,7 +927,7 @@ ipv4_tcp_md5_novrf()
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
 }
 
@@ -817,33 +942,33 @@ ipv4_tcp_md5()
 
 	# basic use case
 	log_start
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Single address config"
 
 	# client sends MD5, server not configured
 	log_start
 	show_hint "Should timeout since server does not have MD5 auth"
-	run_cmd nettest -s -d ${VRF} &
+	run_cmd nettest -s -I ${VRF} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Server no config, client uses password"
 
 	# wrong password
 	log_start
 	show_hint "Should timeout since client uses wrong password"
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Client uses wrong password"
 
 	# client from different address
 	log_start
 	show_hint "Should timeout since server config differs from client"
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
 
 	#
@@ -852,25 +977,25 @@ ipv4_tcp_md5()
 
 	# client in prefix
 	log_start
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsb nettest  -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest  -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Prefix config"
 
 	# client in prefix, wrong password
 	log_start
 	show_hint "Should timeout since client uses wrong password"
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
 
 	# client outside of prefix
 	log_start
 	show_hint "Should timeout since client address is outside of prefix"
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
 
 	#
@@ -878,76 +1003,183 @@ ipv4_tcp_md5()
 	#
 
 	log_start
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
-	run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
+	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
 	sleep 1
-	run_cmd_nsb nettest  -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest  -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
 
 	log_start
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
-	run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
+	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
 	sleep 1
-	run_cmd_nsc nettest  -r ${NSA_IP} -M ${MD5_WRONG_PW}
+	run_cmd_nsc nettest  -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
 
 	log_start
 	show_hint "Should timeout since client in default VRF uses VRF password"
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
-	run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
+	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
 	sleep 1
-	run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
 
 	log_start
 	show_hint "Should timeout since client in VRF uses default VRF password"
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
-	run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
+	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
 
 	log_start
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsb nettest  -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsb nettest  -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
 
 	log_start
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsc nettest  -r ${NSA_IP} -M ${MD5_WRONG_PW}
+	run_cmd_nsc nettest  -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
 
 	log_start
 	show_hint "Should timeout since client in default VRF uses VRF password"
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW}
+	run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
 
 	log_start
 	show_hint "Should timeout since client in VRF uses default VRF password"
-	run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
 	sleep 1
-	run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
 
 	#
 	# negative tests
 	#
 	log_start
-	run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP}
+	run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP}
 	log_test $? 1 "MD5: VRF: Device must be a VRF - single address"
 
 	log_start
-	run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET}
+	run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET}
 	log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
 
+	test_ipv4_md5_vrf__vrf_server__no_bind_ifindex
+	test_ipv4_md5_vrf__global_server__bind_ifindex0
+}
+
+test_ipv4_md5_vrf__vrf_server__no_bind_ifindex()
+{
+	log_start
+	show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX"
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+	sleep 1
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+	log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection"
+
+	log_start
+	show_hint "Binding both the socket and the key is not required but it works"
+	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+	sleep 1
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+	log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection"
+}
+
+test_ipv4_md5_vrf__global_server__bind_ifindex0()
+{
+	# This particular test needs tcp_l3mdev_accept=1 for Global server to accept VRF connections
+	local old_tcp_l3mdev_accept
+	old_tcp_l3mdev_accept=$(get_sysctl net.ipv4.tcp_l3mdev_accept)
+	set_sysctl net.ipv4.tcp_l3mdev_accept=1
+
+	log_start
+	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+	sleep 1
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+	log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection"
+
+	log_start
+	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+	sleep 1
+	run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
+	log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection"
+	log_start
+
+	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+	sleep 1
+	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+	log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection"
+
+	log_start
+	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+	sleep 1
+	run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
+	log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection"
+
+	# restore value
+	set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept"
+}
+
+ipv4_tcp_dontroute()
+{
+	local syncookies=$1
+	local nsa_syncookies
+	local nsb_syncookies
+	local a
+
+	#
+	# Link local connection tests (SO_DONTROUTE).
+	# Connections should succeed only when the remote IP address is
+	# on link (doesn't need to be routed through a gateway).
+	#
+
+	nsa_syncookies=$(ip netns exec "${NSA}" sysctl -n net.ipv4.tcp_syncookies)
+	nsb_syncookies=$(ip netns exec "${NSB}" sysctl -n net.ipv4.tcp_syncookies)
+	ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies}
+	ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies}
+
+	# Test with eth1 address (on link).
+
+	a=${NSB_IP}
+	log_start
+	do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+	log_test_addr ${a} $? 0 "SO_DONTROUTE client, syncookies=${syncookies}"
+
+	a=${NSB_IP}
+	log_start
+	do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --server-dontroute
+	log_test_addr ${a} $? 0 "SO_DONTROUTE server, syncookies=${syncookies}"
+
+	# Test with loopback address (routed).
+	#
+	# The client would use the eth1 address as source IP by default.
+	# Therefore, we need to use the -c option here, to force the use of the
+	# routed (loopback) address as source IP (so that the server will try
+	# to respond to a routed address and not a link local one).
+
+	a=${NSB_LO_IP}
+	log_start
+	show_hint "Should fail 'Network is unreachable' since server is not on link"
+	do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --client-dontroute
+	log_test_addr ${a} $? 1 "SO_DONTROUTE client, syncookies=${syncookies}"
+
+	a=${NSB_LO_IP}
+	log_start
+	show_hint "Should timeout since server cannot respond (client is not on link)"
+	do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --server-dontroute
+	log_test_addr ${a} $? 2 "SO_DONTROUTE server, syncookies=${syncookies}"
+
+	ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${nsb_syncookies}
+	ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${nsa_syncookies}
 }
 
 ipv4_tcp_novrf()
@@ -968,7 +1200,7 @@ ipv4_tcp_novrf()
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -s -d ${NSA_DEV} &
+	run_cmd nettest -s -I ${NSA_DEV} &
 	sleep 1
 	run_cmd_nsb nettest -r ${a}
 	log_test_addr ${a} $? 0 "Device server"
@@ -1024,7 +1256,7 @@ ipv4_tcp_novrf()
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -s -d ${NSA_DEV} &
+	run_cmd nettest -s -I ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -1033,7 +1265,7 @@ ipv4_tcp_novrf()
 	do
 		log_start
 		show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
-		run_cmd nettest -s -d ${NSA_DEV} &
+		run_cmd nettest -s -I ${NSA_DEV} &
 		sleep 1
 		run_cmd nettest -r ${a}
 		log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
@@ -1058,7 +1290,7 @@ ipv4_tcp_novrf()
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest  -d ${NSA_DEV} -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local connection"
@@ -1068,7 +1300,10 @@ ipv4_tcp_novrf()
 	run_cmd nettest -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 1 "No server, device client, local conn"
 
-	ipv4_tcp_md5_novrf
+	[ "$fips_enabled" = "1" ] || ipv4_tcp_md5_novrf
+
+	ipv4_tcp_dontroute 0
+	ipv4_tcp_dontroute 2
 }
 
 ipv4_tcp_vrf()
@@ -1093,13 +1328,13 @@ ipv4_tcp_vrf()
 		log_test_addr ${a} $? 1 "Global server"
 
 		log_start
-		run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+		run_cmd nettest -s -I ${VRF} -3 ${VRF} &
 		sleep 1
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 
 		log_start
-		run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+		run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "Device server"
@@ -1122,7 +1357,11 @@ ipv4_tcp_vrf()
 	log_test_addr ${a} $? 1 "Global server, local connection"
 
 	# run MD5 tests
-	ipv4_tcp_md5
+	if [ "$fips_enabled" = "0" ]; then
+		setup_vrf_dup
+		ipv4_tcp_md5
+		cleanup_vrf_dup
+	fi
 
 	#
 	# enable VRF global server
@@ -1134,14 +1373,14 @@ ipv4_tcp_vrf()
 	do
 		log_start
 		show_hint "client socket should be bound to VRF"
-		run_cmd nettest -s -2 ${VRF} &
+		run_cmd nettest -s -3 ${VRF} &
 		sleep 1
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 
 		log_start
 		show_hint "client socket should be bound to VRF"
-		run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+		run_cmd nettest -s -I ${VRF} -3 ${VRF} &
 		sleep 1
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
@@ -1156,7 +1395,7 @@ ipv4_tcp_vrf()
 	a=${NSA_IP}
 	log_start
 	show_hint "client socket should be bound to device"
-	run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd_nsb nettest -r ${a}
 	log_test_addr ${a} $? 0 "Device server"
@@ -1166,7 +1405,7 @@ ipv4_tcp_vrf()
 	do
 		log_start
 		show_hint "Should fail 'Connection refused' since client is not bound to VRF"
-		run_cmd nettest -s -d ${VRF} &
+		run_cmd nettest -s -I ${VRF} &
 		sleep 1
 		run_cmd nettest -r ${a}
 		log_test_addr ${a} $? 1 "Global server, local connection"
@@ -1203,7 +1442,7 @@ ipv4_tcp_vrf()
 	for a in ${NSA_IP} ${VRF_IP} 127.0.0.1
 	do
 		log_start
-		run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+		run_cmd nettest -s -I ${VRF} -3 ${VRF} &
 		sleep 1
 		run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
 		log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
@@ -1211,26 +1450,26 @@ ipv4_tcp_vrf()
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+	run_cmd nettest -s -I ${VRF} -3 ${VRF} &
 	sleep 1
 	run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local connection"
 
 	log_start
 	show_hint "Should fail 'No route to host' since client is out of VRF scope"
-	run_cmd nettest -s -d ${VRF} &
+	run_cmd nettest -s -I ${VRF} &
 	sleep 1
 	run_cmd nettest -r ${a}
 	log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
 
 	log_start
-	run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
 
 	log_start
-	run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local connection"
@@ -1269,7 +1508,7 @@ ipv4_udp_novrf()
 	for a in ${NSA_IP} ${NSA_LO_IP}
 	do
 		log_start
-		run_cmd nettest -D -s -2 ${NSA_DEV} &
+		run_cmd nettest -D -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
@@ -1282,7 +1521,7 @@ ipv4_udp_novrf()
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+	run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd_nsb nettest -D -r ${a}
 	log_test_addr ${a} $? 0 "Device server"
@@ -1317,6 +1556,13 @@ ipv4_udp_novrf()
 		log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF"
 
 		log_start
+		run_cmd_nsb nettest -D -s &
+		sleep 1
+		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U
+		log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()"
+
+
+		log_start
 		show_hint "Should fail 'Connection refused'"
 		run_cmd nettest -D -r ${a}
 		log_test_addr ${a} $? 1 "No server, unbound client"
@@ -1341,7 +1587,7 @@ ipv4_udp_novrf()
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -r ${a}
 	log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -1350,7 +1596,7 @@ ipv4_udp_novrf()
 	do
 		log_start
 		show_hint "Should fail 'Connection refused' since address is out of device scope"
-		run_cmd nettest -s -D -d ${NSA_DEV} &
+		run_cmd nettest -s -D -I ${NSA_DEV} &
 		sleep 1
 		run_cmd nettest -D -r ${a}
 		log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
@@ -1375,6 +1621,13 @@ ipv4_udp_novrf()
 	run_cmd nettest -D -d ${NSA_DEV} -S -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection"
 
+	log_start
+	run_cmd nettest -s -D &
+	sleep 1
+	run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U
+	log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
+
+
 	# IPv4 with device bind has really weird behavior - it overrides the
 	# fib lookup, generates an rtable and tries to send the packet. This
 	# causes failures for local traffic at different places
@@ -1400,11 +1653,20 @@ ipv4_udp_novrf()
 		sleep 1
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S
 		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
+
+		log_start
+		show_hint "Should fail since addresses on loopback are out of device scope"
+		run_cmd nettest -D -s &
+		sleep 1
+		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U
+		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
+
+
 	done
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -D -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -1412,6 +1674,23 @@ ipv4_udp_novrf()
 	log_start
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 2 "No server, device client, local conn"
+
+	#
+	# Link local connection tests (SO_DONTROUTE).
+	# Connections should succeed only when the remote IP address is
+	# on link (doesn't need to be routed through a gateway).
+	#
+
+	a=${NSB_IP}
+	log_start
+	do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+	log_test_addr ${a} $? 0 "SO_DONTROUTE client"
+
+	a=${NSB_LO_IP}
+	log_start
+	show_hint "Should fail 'Network is unreachable' since server is not on link"
+	do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+	log_test_addr ${a} $? 1 "SO_DONTROUTE client"
 }
 
 ipv4_udp_vrf()
@@ -1435,13 +1714,13 @@ ipv4_udp_vrf()
 		log_test_addr ${a} $? 1 "Global server"
 
 		log_start
-		run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} &
+		run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 
 		log_start
-		run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+		run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "Enslaved device server"
@@ -1461,26 +1740,26 @@ ipv4_udp_vrf()
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
 
 	log_start
-	run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection"
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
 
 	log_start
-	run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -1495,19 +1774,19 @@ ipv4_udp_vrf()
 	for a in ${NSA_IP} ${VRF_IP}
 	do
 		log_start
-		run_cmd nettest -D -s -2 ${NSA_DEV} &
+		run_cmd nettest -D -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 
 		log_start
-		run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} &
+		run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 
 		log_start
-		run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+		run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "Enslaved device server"
@@ -1549,31 +1828,31 @@ ipv4_udp_vrf()
 	#
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -D -s -2 ${NSA_DEV} &
+	run_cmd nettest -D -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
 
 	log_start
-	run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
 
 	log_start
-	run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local conn"
 
 	log_start
-	run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
 
 	log_start
-	run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -1581,7 +1860,7 @@ ipv4_udp_vrf()
 	for a in ${VRF_IP} 127.0.0.1
 	do
 		log_start
-		run_cmd nettest -D -s -2 ${VRF} &
+		run_cmd nettest -D -s -3 ${VRF} &
 		sleep 1
 		run_cmd nettest -D -d ${VRF} -r ${a}
 		log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
@@ -1590,7 +1869,7 @@ ipv4_udp_vrf()
 	for a in ${VRF_IP} 127.0.0.1
 	do
 		log_start
-		run_cmd nettest -s -D -d ${VRF} -2 ${VRF} &
+		run_cmd nettest -s -D -I ${VRF} -3 ${VRF} &
 		sleep 1
 		run_cmd nettest -D -d ${VRF} -r ${a}
 		log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -1645,20 +1924,49 @@ ipv4_addr_bind_novrf()
 		log_test_addr ${a} $? 0 "Raw socket bind to local address"
 
 		log_start
-		run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b
+		run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b
 		log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
 	done
 
 	#
+	# tests for nonlocal bind
+	#
+	a=${NL_IP}
+	log_start
+	run_cmd nettest -s -R -f -l ${a} -b
+	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address"
+
+	log_start
+	run_cmd nettest -s -f -l ${a} -b
+	log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address"
+
+	log_start
+	run_cmd nettest -s -D -P icmp -f -l ${a} -b
+	log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address"
+
+	#
+	# check that ICMP sockets cannot bind to broadcast and multicast addresses
+	#
+	a=${BCAST_IP}
+	log_start
+	run_cmd nettest -s -D -P icmp -l ${a} -b
+	log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address"
+
+	a=${MCAST_IP}
+	log_start
+	run_cmd nettest -s -D -P icmp -l ${a} -b
+	log_test_addr ${a} $? 1 "ICMP socket bind to multicast address"
+
+	#
 	# tcp sockets
 	#
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest -l ${a} -r ${NSB_IP} -t1 -b
+	run_cmd nettest -c ${a} -r ${NSB_IP} -t1 -b
 	log_test_addr ${a} $? 0 "TCP socket bind to local address"
 
 	log_start
-	run_cmd nettest -l ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b
+	run_cmd nettest -c ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b
 	log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
 
 	# Sadly, the kernel allows binding a socket to a device and then
@@ -1668,7 +1976,7 @@ ipv4_addr_bind_novrf()
 	#a=${NSA_LO_IP}
 	#log_start
 	#show_hint "Should fail with 'Cannot assign requested address'"
-	#run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+	#run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b
 	#log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address"
 }
 
@@ -1680,46 +1988,76 @@ ipv4_addr_bind_vrf()
 	for a in ${NSA_IP} ${VRF_IP}
 	do
 		log_start
+		show_hint "Socket not bound to VRF, but address is in VRF"
 		run_cmd nettest -s -R -P icmp -l ${a} -b
-		log_test_addr ${a} $? 0 "Raw socket bind to local address"
+		log_test_addr ${a} $? 1 "Raw socket bind to local address"
 
 		log_start
-		run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b
+		run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b
 		log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
 		log_start
-		run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b
+		run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b
 		log_test_addr ${a} $? 0 "Raw socket bind to local address after VRF bind"
 	done
 
 	a=${NSA_LO_IP}
 	log_start
 	show_hint "Address on loopback is out of VRF scope"
-	run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b
+	run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b
 	log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind"
 
 	#
+	# tests for nonlocal bind
+	#
+	a=${NL_IP}
+	log_start
+	run_cmd nettest -s -R -f -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+
+	log_start
+	run_cmd nettest -s -f -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address after VRF bind"
+
+	log_start
+	run_cmd nettest -s -D -P icmp -f -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address after VRF bind"
+
+	#
+	# check that ICMP sockets cannot bind to broadcast and multicast addresses
+	#
+	a=${BCAST_IP}
+	log_start
+	run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind"
+
+	a=${MCAST_IP}
+	log_start
+	run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind"
+
+	#
 	# tcp sockets
 	#
 	for a in ${NSA_IP} ${VRF_IP}
 	do
 		log_start
-		run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b
+		run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b
 		log_test_addr ${a} $? 0 "TCP socket bind to local address"
 
 		log_start
-		run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+		run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b
 		log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
 	done
 
 	a=${NSA_LO_IP}
 	log_start
 	show_hint "Address on loopback out of scope for VRF"
-	run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b
+	run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b
 	log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF"
 
 	log_start
 	show_hint "Address on loopback out of scope for device in VRF"
-	run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+	run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b
 	log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind"
 }
 
@@ -1729,10 +2067,12 @@ ipv4_addr_bind()
 
 	log_subsection "No VRF"
 	setup
+	set_ping_group
 	ipv4_addr_bind_novrf
 
 	log_subsection "With VRF"
 	setup "yes"
+	set_ping_group
 	ipv4_addr_bind_vrf
 }
 
@@ -1766,7 +2106,7 @@ ipv4_rt()
 	for a in ${NSA_IP} ${VRF_IP}
 	do
 		log_start
-		run_cmd nettest ${varg} -s -d ${VRF} &
+		run_cmd nettest ${varg} -s -I ${VRF} &
 		sleep 1
 		run_cmd_nsb nettest ${varg} -r ${a} &
 		sleep 3
@@ -1779,7 +2119,7 @@ ipv4_rt()
 
 	a=${NSA_IP}
 	log_start
-	run_cmd nettest ${varg} -s -d ${NSA_DEV} &
+	run_cmd nettest ${varg} -s -I ${NSA_DEV} &
 	sleep 1
 	run_cmd_nsb nettest ${varg} -r ${a} &
 	sleep 3
@@ -1834,7 +2174,7 @@ ipv4_rt()
 	for a in ${NSA_IP} ${VRF_IP}
 	do
 		log_start
-		run_cmd nettest ${varg} -d ${VRF} -s &
+		run_cmd nettest ${varg} -I ${VRF} -s &
 		sleep 1
 		run_cmd nettest ${varg} -d ${VRF} -r ${a} &
 		sleep 3
@@ -1847,6 +2187,7 @@ ipv4_rt()
 
 	a=${NSA_IP}
 	log_start
+
 	run_cmd nettest ${varg} -s &
 	sleep 1
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
@@ -1858,7 +2199,7 @@ ipv4_rt()
 	setup ${with_vrf}
 
 	log_start
-	run_cmd nettest ${varg} -d ${VRF} -s &
+	run_cmd nettest ${varg} -I ${VRF} -s &
 	sleep 1
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
@@ -1869,7 +2210,7 @@ ipv4_rt()
 	setup ${with_vrf}
 
 	log_start
-	run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+	run_cmd nettest ${varg} -I ${NSA_DEV} -s &
 	sleep 1
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
@@ -2073,7 +2414,7 @@ ipv6_ping_vrf()
 		log_start
 		show_hint "Fails since VRF device does not support linklocal or multicast"
 		run_cmd ${ping6} -c1 -w1 ${a}
-		log_test_addr ${a} $? 2 "ping out, VRF bind"
+		log_test_addr ${a} $? 1 "ping out, VRF bind"
 	done
 
 	for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
@@ -2193,10 +2534,16 @@ ipv6_ping()
 	log_subsection "No VRF"
 	setup
 	ipv6_ping_novrf
+	setup
+	set_ping_group
+	ipv6_ping_novrf
 
 	log_subsection "With VRF"
 	setup "yes"
 	ipv6_ping_vrf
+	setup "yes"
+	set_ping_group
+	ipv6_ping_vrf
 }
 
 ################################################################################
@@ -2213,9 +2560,9 @@ ipv6_tcp_md5_novrf()
 
 	# basic use case
 	log_start
-	run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} &
+	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: Single address config"
 
 	# client sends MD5, server not configured
@@ -2223,23 +2570,23 @@ ipv6_tcp_md5_novrf()
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -6 -s &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: Server no config, client uses password"
 
 	# wrong password
 	log_start
 	show_hint "Should timeout since client uses wrong password"
-	run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} &
+	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: Client uses wrong password"
 
 	# client from different address
 	log_start
 	show_hint "Should timeout due to MD5 mismatch"
-	run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_LO_IP6} &
+	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: Client address does not match address configured with password"
 
 	#
@@ -2250,7 +2597,7 @@ ipv6_tcp_md5_novrf()
 	log_start
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsb nettest -6  -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: Prefix config"
 
 	# client in prefix, wrong password
@@ -2258,7 +2605,7 @@ ipv6_tcp_md5_novrf()
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: Prefix config, client uses wrong password"
 
 	# client outside of prefix
@@ -2266,7 +2613,7 @@ ipv6_tcp_md5_novrf()
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
 }
 
@@ -2281,33 +2628,33 @@ ipv6_tcp_md5()
 
 	# basic use case
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Single address config"
 
 	# client sends MD5, server not configured
 	log_start
 	show_hint "Should timeout since server does not have MD5 auth"
-	run_cmd nettest -6 -s -d ${VRF} &
+	run_cmd nettest -6 -s -I ${VRF} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Server no config, client uses password"
 
 	# wrong password
 	log_start
 	show_hint "Should timeout since client uses wrong password"
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Client uses wrong password"
 
 	# client from different address
 	log_start
 	show_hint "Should timeout since server config differs from client"
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
 
 	#
@@ -2316,25 +2663,25 @@ ipv6_tcp_md5()
 
 	# client in prefix
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsb nettest -6  -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Prefix config"
 
 	# client in prefix, wrong password
 	log_start
 	show_hint "Should timeout since client uses wrong password"
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
 
 	# client outside of prefix
 	log_start
 	show_hint "Should timeout since client address is outside of prefix"
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
 
 	#
@@ -2342,74 +2689,74 @@ ipv6_tcp_md5()
 	#
 
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
-	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
+	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
 	sleep 1
-	run_cmd_nsb nettest -6  -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
 
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
-	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
+	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
 	sleep 1
-	run_cmd_nsc nettest -6  -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+	run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
 
 	log_start
 	show_hint "Should timeout since client in default VRF uses VRF password"
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
-	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
+	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
 	sleep 1
-	run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
 
 	log_start
 	show_hint "Should timeout since client in VRF uses default VRF password"
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
-	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
+	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
 
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsb nettest -6  -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
 
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsc nettest -6  -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+	run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
 
 	log_start
 	show_hint "Should timeout since client in default VRF uses VRF password"
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+	run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
 
 	log_start
 	show_hint "Should timeout since client in VRF uses default VRF password"
-	run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
 	sleep 1
-	run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
 
 	#
 	# negative tests
 	#
 	log_start
-	run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP6}
+	run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP6}
 	log_test $? 1 "MD5: VRF: Device must be a VRF - single address"
 
 	log_start
-	run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6}
+	run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6}
 	log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
 
 }
@@ -2482,7 +2829,7 @@ ipv6_tcp_novrf()
 
 	a=${NSA_IP6}
 	log_start
-	run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -2491,7 +2838,7 @@ ipv6_tcp_novrf()
 	do
 		log_start
 		show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
-		run_cmd nettest -6 -s -d ${NSA_DEV} &
+		run_cmd nettest -6 -s -I ${NSA_DEV} &
 		sleep 1
 		run_cmd nettest -6 -r ${a}
 		log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
@@ -2517,7 +2864,7 @@ ipv6_tcp_novrf()
 	for a in ${NSA_IP6} ${NSA_LINKIP6}
 	do
 		log_start
-		run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+		run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd nettest -6  -d ${NSA_DEV} -r ${a}
 		log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -2531,7 +2878,7 @@ ipv6_tcp_novrf()
 		log_test_addr ${a} $? 1 "No server, device client, local conn"
 	done
 
-	ipv6_tcp_md5_novrf
+	[ "$fips_enabled" = "1" ] || ipv6_tcp_md5_novrf
 }
 
 ipv6_tcp_vrf()
@@ -2559,7 +2906,7 @@ ipv6_tcp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+		run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
 		sleep 1
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
@@ -2568,7 +2915,7 @@ ipv6_tcp_vrf()
 	# link local is always bound to ingress device
 	a=${NSA_LINKIP6}%${NSB_DEV}
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} &
+	run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd_nsb nettest -6 -r ${a}
 	log_test_addr ${a} $? 0 "VRF server"
@@ -2576,7 +2923,7 @@ ipv6_tcp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV}
 	do
 		log_start
-		run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+		run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Device server"
@@ -2601,7 +2948,11 @@ ipv6_tcp_vrf()
 	log_test_addr ${a} $? 1 "Global server, local connection"
 
 	# run MD5 tests
-	ipv6_tcp_md5
+	if [ "$fips_enabled" = "0" ]; then
+		setup_vrf_dup
+		ipv6_tcp_md5
+		cleanup_vrf_dup
+	fi
 
 	#
 	# enable VRF global server
@@ -2612,7 +2963,7 @@ ipv6_tcp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -s -2 ${VRF} &
+		run_cmd nettest -6 -s -3 ${VRF} &
 		sleep 1
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
@@ -2621,7 +2972,7 @@ ipv6_tcp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+		run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
 		sleep 1
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
@@ -2630,13 +2981,13 @@ ipv6_tcp_vrf()
 	# For LLA, child socket is bound to device
 	a=${NSA_LINKIP6}%${NSB_DEV}
 	log_start
-	run_cmd nettest -6 -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd_nsb nettest -6 -r ${a}
 	log_test_addr ${a} $? 0 "Global server"
 
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} &
+	run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd_nsb nettest -6 -r ${a}
 	log_test_addr ${a} $? 0 "VRF server"
@@ -2644,7 +2995,7 @@ ipv6_tcp_vrf()
 	for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV}
 	do
 		log_start
-		run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+		run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Device server"
@@ -2664,7 +3015,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		show_hint "Fails 'Connection refused' since client is not in VRF"
-		run_cmd nettest -6 -s -d ${VRF} &
+		run_cmd nettest -6 -s -I ${VRF} &
 		sleep 1
 		run_cmd nettest -6 -r ${a}
 		log_test_addr ${a} $? 1 "Global server, local connection"
@@ -2719,7 +3070,7 @@ ipv6_tcp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6} ::1
 	do
 		log_start
-		run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+		run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
 		sleep 1
 		run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
 		log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
@@ -2727,7 +3078,7 @@ ipv6_tcp_vrf()
 
 	a=${NSA_IP6}
 	log_start
-	run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+	run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
 	sleep 1
 	run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local connection"
@@ -2735,13 +3086,13 @@ ipv6_tcp_vrf()
 	a=${NSA_IP6}
 	log_start
 	show_hint "Should fail since unbound client is out of VRF scope"
-	run_cmd nettest -6 -s -d ${VRF} &
+	run_cmd nettest -6 -s -I ${VRF} &
 	sleep 1
 	run_cmd nettest -6 -r ${a}
 	log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
 
 	log_start
-	run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
@@ -2749,7 +3100,7 @@ ipv6_tcp_vrf()
 	for a in ${NSA_IP6} ${NSA_LINKIP6}
 	do
 		log_start
-		run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+		run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
 		log_test_addr ${a} $? 0 "Device server, device client, local connection"
@@ -2789,13 +3140,13 @@ ipv6_udp_novrf()
 	for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV}
 	do
 		log_start
-		run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+		run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 
 		log_start
-		run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+		run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Device server"
@@ -2803,7 +3154,7 @@ ipv6_udp_novrf()
 
 	a=${NSA_LO_IP6}
 	log_start
-	run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd_nsb nettest -6 -D -r ${a}
 	log_test_addr ${a} $? 0 "Global server"
@@ -2813,7 +3164,7 @@ ipv6_udp_novrf()
 	# behavior.
 	#log_start
 	#show_hint "Should fail since loopback address is out of scope"
-	#run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+	#run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 	#sleep 1
 	#run_cmd_nsb nettest -6 -D -r ${a}
 	#log_test_addr ${a} $? 1 "Device server"
@@ -2881,7 +3232,7 @@ ipv6_udp_novrf()
 
 	a=${NSA_IP6}
 	log_start
-	run_cmd nettest -6 -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -r ${a}
 	log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -2890,7 +3241,7 @@ ipv6_udp_novrf()
 	do
 		log_start
 		show_hint "Should fail 'Connection refused' since address is out of device scope"
-		run_cmd nettest -6 -s -D -d ${NSA_DEV} &
+		run_cmd nettest -6 -s -D -I ${NSA_DEV} &
 		sleep 1
 		run_cmd nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 1 "Device server, local connection"
@@ -2937,11 +3288,18 @@ ipv6_udp_novrf()
 		sleep 1
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S
 		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
+
+		log_start
+		show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
+		run_cmd nettest -6 -D -s &
+		sleep 1
+		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U
+		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
 	done
 
 	a=${NSA_IP6}
 	log_start
-	run_cmd nettest -6 -D -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -2988,7 +3346,7 @@ ipv6_udp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+		run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
@@ -2997,7 +3355,7 @@ ipv6_udp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+		run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Enslaved device server"
@@ -3028,7 +3386,7 @@ ipv6_udp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -D -d ${VRF} -s &
+		run_cmd nettest -6 -D -I ${VRF} -s &
 		sleep 1
 		run_cmd nettest -6 -D -d ${VRF} -r ${a}
 		log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3043,19 +3401,19 @@ ipv6_udp_vrf()
 	log_test_addr ${a} $? 1 "Global server, device client, local conn"
 
 	log_start
-	run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local conn"
 
 	log_start
-	run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
 
 	log_start
-	run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -3070,7 +3428,7 @@ ipv6_udp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+		run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
@@ -3079,7 +3437,7 @@ ipv6_udp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+		run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
@@ -3088,7 +3446,7 @@ ipv6_udp_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+		run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 		sleep 1
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Enslaved device server"
@@ -3132,13 +3490,13 @@ ipv6_udp_vrf()
 	#
 	a=${NSA_IP6}
 	log_start
-	run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
 
 	#log_start
-	run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3146,13 +3504,13 @@ ipv6_udp_vrf()
 
 	a=${VRF_IP6}
 	log_start
-	run_cmd nettest -6 -D -s -2 ${VRF} &
+	run_cmd nettest -6 -D -s -3 ${VRF} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
 
 	log_start
-	run_cmd nettest -6 -D -d ${VRF} -s -2 ${VRF} &
+	run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3168,25 +3526,25 @@ ipv6_udp_vrf()
 	# device to global IP
 	a=${NSA_IP6}
 	log_start
-	run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device client, local conn"
 
 	log_start
-	run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local conn"
 
 	log_start
-	run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Device server, VRF client, local conn"
 
 	log_start
-	run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+	run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
 	sleep 1
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -3280,11 +3638,19 @@ ipv6_addr_bind_novrf()
 		log_test_addr ${a} $? 0 "Raw socket bind to local address"
 
 		log_start
-		run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b
+		run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b
 		log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
 	done
 
 	#
+	# raw socket with nonlocal bind
+	#
+	a=${NL_IP6}
+	log_start
+	run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b
+	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address"
+
+	#
 	# tcp sockets
 	#
 	a=${NSA_IP6}
@@ -3293,14 +3659,17 @@ ipv6_addr_bind_novrf()
 	log_test_addr ${a} $? 0 "TCP socket bind to local address"
 
 	log_start
-	run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+	run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
 	log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
 
+	# Sadly, the kernel allows binding a socket to a device and then
+	# binding to an address not on the device. So this test passes
+	# when it really should not
 	a=${NSA_LO_IP6}
 	log_start
-	show_hint "Should fail with 'Cannot assign requested address'"
-	run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
-	log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address"
+	show_hint "Tecnically should fail since address is not on device but kernel allows"
+	run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
+	log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
 }
 
 ipv6_addr_bind_vrf()
@@ -3311,50 +3680,63 @@ ipv6_addr_bind_vrf()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b
+		run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b
 		log_test_addr ${a} $? 0 "Raw socket bind to local address after vrf bind"
 
 		log_start
-		run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b
+		run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b
 		log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
 	done
 
 	a=${NSA_LO_IP6}
 	log_start
 	show_hint "Address on loopback is out of VRF scope"
-	run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b
+	run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b
 	log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind"
 
 	#
+	# raw socket with nonlocal bind
+	#
+	a=${NL_IP6}
+	log_start
+	run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${VRF} -b
+	log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+
+	#
 	# tcp sockets
 	#
 	# address on enslaved device is valid for the VRF or device in a VRF
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b
+		run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b
 		log_test_addr ${a} $? 0 "TCP socket bind to local address with VRF bind"
 	done
 
 	a=${NSA_IP6}
 	log_start
-	run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+	run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
 	log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind"
 
+	# Sadly, the kernel allows binding a socket to a device and then
+	# binding to an address not on the device. The only restriction
+	# is that the address is valid in the L3 domain. So this test
+	# passes when it really should not
 	a=${VRF_IP6}
 	log_start
-	run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
-	log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind"
+	show_hint "Tecnically should fail since address is not on device but kernel allows"
+	run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
+	log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
 
 	a=${NSA_LO_IP6}
 	log_start
 	show_hint "Address on loopback out of scope for VRF"
-	run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b
+	run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b
 	log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF"
 
 	log_start
 	show_hint "Address on loopback out of scope for device in VRF"
-	run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+	run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
 	log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind"
 
 }
@@ -3402,7 +3784,7 @@ ipv6_rt()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest ${varg} -d ${VRF} -s &
+		run_cmd nettest ${varg} -I ${VRF} -s &
 		sleep 1
 		run_cmd_nsb nettest ${varg} -r ${a} &
 		sleep 3
@@ -3416,7 +3798,7 @@ ipv6_rt()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+		run_cmd nettest ${varg} -I ${NSA_DEV} -s &
 		sleep 1
 		run_cmd_nsb nettest ${varg} -r ${a} &
 		sleep 3
@@ -3473,7 +3855,7 @@ ipv6_rt()
 	for a in ${NSA_IP6} ${VRF_IP6}
 	do
 		log_start
-		run_cmd nettest ${varg} -d ${VRF} -s &
+		run_cmd nettest ${varg} -I ${VRF} -s &
 		sleep 1
 		run_cmd nettest ${varg} -d ${VRF} -r ${a} &
 		sleep 3
@@ -3497,7 +3879,7 @@ ipv6_rt()
 	setup ${with_vrf}
 
 	log_start
-	run_cmd nettest ${varg} -d ${VRF} -s &
+	run_cmd nettest ${varg} -I ${VRF} -s &
 	sleep 1
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
@@ -3508,7 +3890,7 @@ ipv6_rt()
 	setup ${with_vrf}
 
 	log_start
-	run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+	run_cmd nettest ${varg} -I ${NSA_DEV} -s &
 	sleep 1
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
@@ -3787,10 +4169,81 @@ use_case_br()
 	setup_cmd_nsb ip li del vlan100 2>/dev/null
 }
 
+# VRF only.
+# ns-A device is connected to both ns-B and ns-C on a single VRF but only has
+# LLA on the interfaces
+use_case_ping_lla_multi()
+{
+	setup_lla_only
+	# only want reply from ns-A
+	setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+	setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B"
+
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C"
+
+	# cycle/flap the first ns-A interface
+	setup_cmd ip link set ${NSA_DEV} down
+	setup_cmd ip link set ${NSA_DEV} up
+	sleep 1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B"
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C"
+
+	# cycle/flap the second ns-A interface
+	setup_cmd ip link set ${NSA_DEV2} down
+	setup_cmd ip link set ${NSA_DEV2} up
+	sleep 1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B"
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
+}
+
+# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully
+# established with ns-B.
+use_case_snat_on_vrf()
+{
+	setup "yes"
+
+	local port="12345"
+
+	run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+	run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+
+	run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
+	sleep 1
+	run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
+	log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
+
+	run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
+	sleep 1
+	run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
+	log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
+
+	# Cleanup
+	run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+	run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+}
+
 use_cases()
 {
 	log_section "Use cases"
+	log_subsection "Device enslaved to bridge"
 	use_case_br
+	log_subsection "Ping LLA with multiple interfaces"
+	use_case_ping_lla_multi
+	log_subsection "SNAT on VRF"
+	use_case_snat_on_vrf
 }
 
 ################################################################################
@@ -3807,14 +4260,17 @@ usage: ${0##*/} OPTS
 	-p          Pause on fail
 	-P          Pause after each test
 	-v          Be verbose
+
+Tests:
+	$TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER
 EOF
 }
 
 ################################################################################
 # main
 
-TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter"
-TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter"
+TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter"
+TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter"
 TESTS_OTHER="use_cases"
 
 PAUSE_ON_FAIL=no
@@ -3848,10 +4304,13 @@ elif [ "$TESTS" = "ipv6" ]; then
 	TESTS="$TESTS_IPV6"
 fi
 
-which nettest >/dev/null
-if [ $? -ne 0 ]; then
-	echo "'nettest' command not found; skipping tests"
-	exit 0
+# nettest can be run from PATH or from same directory as this selftest
+if ! which nettest >/dev/null; then
+	PATH=$PWD:$PATH
+	if ! which nettest >/dev/null; then
+		echo "'nettest' command not found; skipping tests"
+		exit $ksft_skip
+	fi
 fi
 
 declare -i nfail=0
@@ -3879,8 +4338,6 @@ do
 	# setup namespaces and config, but do not run any tests
 	setup)		 setup; exit 0;;
 	vrf_setup)	 setup "yes"; exit 0;;
-
-	help)            echo "Test names: $TESTS"; exit 0;;
 	esac
 done
 
@@ -3888,3 +4345,11 @@ cleanup 2>/dev/null
 
 printf "\nTests passed: %3d\n" ${nsuccess}
 printf "Tests failed: %3d\n"   ${nfail}
+
+if [ $nfail -ne 0 ]; then
+	exit 1 # KSFT_FAIL
+elif [ $nsuccess -eq 0 ]; then
+	exit $ksft_skip
+fi
+
+exit 0 # KSFT_PASS
diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh
new file mode 100755
index 000000000000..d5e3abb8658c
--- /dev/null
+++ b/tools/testing/selftests/net/fdb_flush.sh
@@ -0,0 +1,813 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking functionality of flushing FDB entries.
+# Check that flush works as expected with all the supported arguments and verify
+# some combinations of arguments.
+
+source lib.sh
+
+FLUSH_BY_STATE_TESTS="
+	vxlan_test_flush_by_permanent
+	vxlan_test_flush_by_nopermanent
+	vxlan_test_flush_by_static
+	vxlan_test_flush_by_nostatic
+	vxlan_test_flush_by_dynamic
+	vxlan_test_flush_by_nodynamic
+"
+
+FLUSH_BY_FLAG_TESTS="
+	vxlan_test_flush_by_extern_learn
+	vxlan_test_flush_by_noextern_learn
+	vxlan_test_flush_by_router
+	vxlan_test_flush_by_norouter
+"
+
+TESTS="
+	vxlan_test_flush_by_dev
+	vxlan_test_flush_by_vni
+	vxlan_test_flush_by_src_vni
+	vxlan_test_flush_by_port
+	vxlan_test_flush_by_dst_ip
+	vxlan_test_flush_by_nhid
+	$FLUSH_BY_STATE_TESTS
+	$FLUSH_BY_FLAG_TESTS
+	vxlan_test_flush_by_several_args
+	vxlan_test_flush_by_remote_attributes
+	bridge_test_flush_by_dev
+	bridge_test_flush_by_vlan
+	bridge_vxlan_test_flush
+"
+
+: ${VERBOSE:=0}
+: ${PAUSE_ON_FAIL:=no}
+: ${PAUSE:=no}
+: ${VXPORT:=4789}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local rc
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+	local nsuccess
+	local nfail
+	local ret
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "$VERBOSE" = "1" ]; then
+			echo "    rc=$rc, expected $expected"
+		fi
+
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+MAC_POOL_1="
+	de:ad:be:ef:13:10
+	de:ad:be:ef:13:11
+	de:ad:be:ef:13:12
+	de:ad:be:ef:13:13
+	de:ad:be:ef:13:14
+"
+mac_pool_1_len=$(echo "$MAC_POOL_1" | grep -c .)
+
+MAC_POOL_2="
+	ca:fe:be:ef:13:10
+	ca:fe:be:ef:13:11
+	ca:fe:be:ef:13:12
+	ca:fe:be:ef:13:13
+	ca:fe:be:ef:13:14
+"
+mac_pool_2_len=$(echo "$MAC_POOL_2" | grep -c .)
+
+fdb_add_mac_pool_1()
+{
+	local dev=$1; shift
+	local args="$@"
+
+	for mac in $MAC_POOL_1
+	do
+		$BRIDGE fdb add $mac dev $dev $args
+	done
+}
+
+fdb_add_mac_pool_2()
+{
+	local dev=$1; shift
+	local args="$@"
+
+	for mac in $MAC_POOL_2
+	do
+		$BRIDGE fdb add $mac dev $dev $args
+	done
+}
+
+fdb_check_n_entries_by_dev_filter()
+{
+	local dev=$1; shift
+	local exp_entries=$1; shift
+	local filter="$@"
+
+	local entries=$($BRIDGE fdb show dev $dev | grep "$filter" | wc -l)
+
+	[[ $entries -eq $exp_entries ]]
+	rc=$?
+
+	log_test $rc 0 "$dev: Expected $exp_entries FDB entries, got $entries"
+	return $rc
+}
+
+vxlan_test_flush_by_dev()
+{
+	local vni=3000
+	local dst_ip=192.0.2.1
+
+	fdb_add_mac_pool_1 vx10 vni $vni dst $dst_ip
+	fdb_add_mac_pool_2 vx20 vni $vni dst $dst_ip
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len
+	fdb_check_n_entries_by_dev_filter vx20 $mac_pool_2_len
+
+	run_cmd "$BRIDGE fdb flush dev vx10"
+	log_test $? 0 "Flush FDB by dev vx10"
+
+	fdb_check_n_entries_by_dev_filter vx10 0
+	log_test $? 0 "Flush FDB by dev vx10 - test vx10 entries"
+
+	fdb_check_n_entries_by_dev_filter vx20 $mac_pool_2_len
+	log_test $? 0 "Flush FDB by dev vx10 - test vx20 entries"
+}
+
+vxlan_test_flush_by_vni()
+{
+	local vni_1=3000
+	local vni_2=4000
+	local dst_ip=192.0.2.1
+
+	fdb_add_mac_pool_1 vx10 vni $vni_1 dst $dst_ip
+	fdb_add_mac_pool_2 vx10 vni $vni_2 dst $dst_ip
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni $vni_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len vni $vni_2
+
+	run_cmd "$BRIDGE fdb flush dev vx10 vni $vni_2"
+	log_test $? 0 "Flush FDB by dev vx10 and vni $vni_2"
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni $vni_1
+	log_test $? 0 "Test entries with vni $vni_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 vni $vni_2
+	log_test $? 0 "Test entries with vni $vni_2"
+}
+
+vxlan_test_flush_by_src_vni()
+{
+	# Set some entries with {vni=x,src_vni=y} and some with the opposite -
+	# {vni=y,src_vni=x}, to verify that when we flush by src_vni=x, entries
+	# with vni=x are not flused.
+	local vni_1=3000
+	local vni_2=4000
+	local src_vni_1=4000
+	local src_vni_2=3000
+	local dst_ip=192.0.2.1
+
+	# Reconfigure vx10 with 'external' to get 'src_vni' details in
+	# 'bridge fdb' output
+	$IP link del dev vx10
+	$IP link add name vx10 type vxlan dstport "$VXPORT" external
+
+	fdb_add_mac_pool_1 vx10 vni $vni_1 src_vni $src_vni_1 dst $dst_ip
+	fdb_add_mac_pool_2 vx10 vni $vni_2 src_vni $src_vni_2 dst $dst_ip
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len \
+		src_vni $src_vni_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len \
+		src_vni $src_vni_2
+
+	run_cmd "$BRIDGE fdb flush dev vx10 src_vni $src_vni_2"
+	log_test $? 0 "Flush FDB by dev vx10 and src_vni $src_vni_2"
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len \
+		src_vni $src_vni_1
+	log_test $? 0 "Test entries with src_vni $src_vni_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 src_vni $src_vni_2
+	log_test $? 0 "Test entries with src_vni $src_vni_2"
+}
+
+vxlan_test_flush_by_port()
+{
+	local port_1=1234
+	local port_2=4321
+	local dst_ip=192.0.2.1
+
+	fdb_add_mac_pool_1 vx10 port $port_1 dst $dst_ip
+	fdb_add_mac_pool_2 vx10 port $port_2 dst $dst_ip
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len port $port_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len port $port_2
+
+	run_cmd "$BRIDGE fdb flush dev vx10 port $port_2"
+	log_test $? 0 "Flush FDB by dev vx10 and port $port_2"
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len port $port_1
+	log_test $? 0 "Test entries with port $port_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 port $port_2
+	log_test $? 0 "Test entries with port $port_2"
+}
+
+vxlan_test_flush_by_dst_ip()
+{
+	local dst_ip_1=192.0.2.1
+	local dst_ip_2=192.0.2.2
+
+	fdb_add_mac_pool_1 vx10 dst $dst_ip_1
+	fdb_add_mac_pool_2 vx10 dst $dst_ip_2
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2
+
+	run_cmd "$BRIDGE fdb flush dev vx10 dst $dst_ip_2"
+	log_test $? 0 "Flush FDB by dev vx10 and dst $dst_ip_2"
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+	log_test $? 0 "Test entries with dst $dst_ip_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 dst $dst_ip_2
+	log_test $? 0 "Test entries with dst $dst_ip_2"
+}
+
+nexthops_add()
+{
+	local nhid_1=$1; shift
+	local nhid_2=$1; shift
+
+	$IP nexthop add id 10 via 192.0.2.1 fdb
+	$IP nexthop add id $nhid_1 group 10 fdb
+
+	$IP nexthop add id 20 via 192.0.2.2 fdb
+	$IP nexthop add id $nhid_2 group 20 fdb
+}
+
+vxlan_test_flush_by_nhid()
+{
+	local nhid_1=100
+	local nhid_2=200
+
+	nexthops_add $nhid_1 $nhid_2
+
+	fdb_add_mac_pool_1 vx10 nhid $nhid_1
+	fdb_add_mac_pool_2 vx10 nhid $nhid_2
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len nhid $nhid_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len nhid $nhid_2
+
+	run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_2"
+	log_test $? 0 "Flush FDB by dev vx10 and nhid $nhid_2"
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len nhid $nhid_1
+	log_test $? 0 "Test entries with nhid $nhid_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 nhid $nhid_2
+	log_test $? 0 "Test entries with nhid $nhid_2"
+
+	# Flush also entries with $nhid_1, and then verify that flushing by
+	# 'nhid' does not return an error when there are no entries with
+	# nexthops.
+	run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_1"
+	log_test $? 0 "Flush FDB by dev vx10 and nhid $nhid_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 nhid
+	log_test $? 0 "Test entries with 'nhid' keyword"
+
+	run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_1"
+	log_test $? 0 "Flush FDB by nhid when there are no entries with nexthop"
+}
+
+vxlan_test_flush_by_state()
+{
+	local flush_by_state=$1; shift
+	local state_1=$1; shift
+	local exp_state_1=$1; shift
+	local state_2=$1; shift
+	local exp_state_2=$1; shift
+
+	local dst_ip_1=192.0.2.1
+	local dst_ip_2=192.0.2.2
+
+	fdb_add_mac_pool_1 vx10 dst $dst_ip_1 $state_1
+	fdb_add_mac_pool_2 vx10 dst $dst_ip_2 $state_2
+
+	# Check the entries by dst_ip as not all states appear in 'bridge fdb'
+	# output.
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2
+
+	run_cmd "$BRIDGE fdb flush dev vx10 $flush_by_state"
+	log_test $? 0 "Flush FDB by dev vx10 and state $flush_by_state"
+
+	fdb_check_n_entries_by_dev_filter vx10 $exp_state_1 dst $dst_ip_1
+	log_test $? 0 "Test entries with state $state_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 $exp_state_2 dst $dst_ip_2
+	log_test $? 0 "Test entries with state $state_2"
+}
+
+vxlan_test_flush_by_permanent()
+{
+	# Entries that are added without state get 'permanent' state by
+	# default, add some entries with flag 'extern_learn' instead of state,
+	# so they will be added with 'permanent' and should be flushed also.
+	local flush_by_state="permanent"
+	local state_1="permanent"
+	local exp_state_1=0
+	local state_2="extern_learn"
+	local exp_state_2=0
+
+	vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+		$state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_nopermanent()
+{
+	local flush_by_state="nopermanent"
+	local state_1="permanent"
+	local exp_state_1=$mac_pool_1_len
+	local state_2="static"
+	local exp_state_2=0
+
+	vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+		$state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_static()
+{
+	local flush_by_state="static"
+	local state_1="static"
+	local exp_state_1=0
+	local state_2="dynamic"
+	local exp_state_2=$mac_pool_2_len
+
+	vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+		$state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_nostatic()
+{
+	local flush_by_state="nostatic"
+	local state_1="permanent"
+	local exp_state_1=$mac_pool_1_len
+	local state_2="dynamic"
+	local exp_state_2=0
+
+	vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+		$state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_dynamic()
+{
+	local flush_by_state="dynamic"
+	local state_1="dynamic"
+	local exp_state_1=0
+	local state_2="static"
+	local exp_state_2=$mac_pool_2_len
+
+	vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+		$state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_nodynamic()
+{
+	local flush_by_state="nodynamic"
+	local state_1="permanent"
+	local exp_state_1=0
+	local state_2="dynamic"
+	local exp_state_2=$mac_pool_2_len
+
+	vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+		$state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_flag()
+{
+	local flush_by_flag=$1; shift
+	local flag_1=$1; shift
+	local exp_flag_1=$1; shift
+	local flag_2=$1; shift
+	local exp_flag_2=$1; shift
+
+	local dst_ip_1=192.0.2.1
+	local dst_ip_2=192.0.2.2
+
+	fdb_add_mac_pool_1 vx10 dst $dst_ip_1 $flag_1
+	fdb_add_mac_pool_2 vx10 dst $dst_ip_2 $flag_2
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len $flag_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len $flag_2
+
+	run_cmd "$BRIDGE fdb flush dev vx10 $flush_by_flag"
+	log_test $? 0 "Flush FDB by dev vx10 and flag $flush_by_flag"
+
+	fdb_check_n_entries_by_dev_filter vx10 $exp_flag_1 dst $dst_ip_1
+	log_test $? 0 "Test entries with flag $flag_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 $exp_flag_2 dst $dst_ip_2
+	log_test $? 0 "Test entries with flag $flag_2"
+}
+
+vxlan_test_flush_by_extern_learn()
+{
+	local flush_by_flag="extern_learn"
+	local flag_1="extern_learn"
+	local exp_flag_1=0
+	local flag_2="router"
+	local exp_flag_2=$mac_pool_2_len
+
+	vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \
+		$flag_2 $exp_flag_2
+}
+
+vxlan_test_flush_by_noextern_learn()
+{
+	local flush_by_flag="noextern_learn"
+	local flag_1="extern_learn"
+	local exp_flag_1=$mac_pool_1_len
+	local flag_2="router"
+	local exp_flag_2=0
+
+	vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \
+		$flag_2 $exp_flag_2
+}
+
+vxlan_test_flush_by_router()
+{
+	local flush_by_flag="router"
+	local flag_1="router"
+	local exp_flag_1=0
+	local flag_2="extern_learn"
+	local exp_flag_2=$mac_pool_2_len
+
+	vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \
+		$flag_2 $exp_flag_2
+}
+
+vxlan_test_flush_by_norouter()
+{
+
+	local flush_by_flag="norouter"
+	local flag_1="router"
+	local exp_flag_1=$mac_pool_1_len
+	local flag_2="extern_learn"
+	local exp_flag_2=0
+
+	vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \
+		$flag_2 $exp_flag_2
+}
+
+vxlan_test_flush_by_several_args()
+{
+	local dst_ip_1=192.0.2.1
+	local dst_ip_2=192.0.2.2
+	local state_1=permanent
+	local state_2=static
+	local vni=3000
+	local port=1234
+	local nhid=100
+	local flag=router
+	local flush_args
+
+	################### Flush by 2 args - nhid and flag ####################
+	$IP nexthop add id 10 via 192.0.2.1 fdb
+	$IP nexthop add id $nhid group 10 fdb
+
+	fdb_add_mac_pool_1 vx10 nhid $nhid $flag $state_1
+	fdb_add_mac_pool_2 vx10 nhid $nhid $flag $state_2
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len $state_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len $state_2
+
+	run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid $flag"
+	log_test $? 0 "Flush FDB by dev vx10 nhid $nhid $flag"
+
+	# All entries should be flushed as 'state' is not an argument for flush
+	# filtering.
+	fdb_check_n_entries_by_dev_filter vx10 0 $state_1
+	log_test $? 0 "Test entries with state $state_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 $state_2
+	log_test $? 0 "Test entries with state $state_2"
+
+	################ Flush by 3 args - VNI, port and dst_ip ################
+	fdb_add_mac_pool_1 vx10 vni $vni port $port dst $dst_ip_1
+	fdb_add_mac_pool_2 vx10 vni $vni port $port dst $dst_ip_2
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2
+
+	flush_args="vni $vni port $port dst $dst_ip_2"
+	run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+	log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+	# Only entries with $dst_ip_2 should be flushed, even the rest arguments
+	# match the filter, the flush should be AND of all the arguments.
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+	log_test $? 0 "Test entries with dst $dst_ip_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 dst $dst_ip_2
+	log_test $? 0 "Test entries with dst $dst_ip_2"
+}
+
+multicast_fdb_entries_add()
+{
+	mac=00:00:00:00:00:00
+	vnis=(2000 3000)
+
+	for vni in "${vnis[@]}"; do
+		$BRIDGE fdb append $mac dev vx10 dst 192.0.2.1 vni $vni \
+			src_vni 5000
+		$BRIDGE fdb append $mac dev vx10 dst 192.0.2.1 vni $vni \
+			port 1111
+		$BRIDGE fdb append $mac dev vx10 dst 192.0.2.2 vni $vni \
+			port 2222
+	done
+}
+
+vxlan_test_flush_by_remote_attributes()
+{
+	local flush_args
+
+	# Reconfigure vx10 with 'external' to get 'src_vni' details in
+	# 'bridge fdb' output
+	$IP link del dev vx10
+	$IP link add name vx10 type vxlan dstport "$VXPORT" external
+
+	# For multicat FDB entries, the VXLAN driver stores a linked list of
+	# remotes for a given key. Verify that only the expected remotes are
+	# flushed.
+	multicast_fdb_entries_add
+
+	## Flush by 3 remote's attributes - destination IP, port and VNI ##
+	flush_args="dst 192.0.2.1 port 1111 vni 2000"
+	fdb_check_n_entries_by_dev_filter vx10 1 $flush_args
+
+	t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+	run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+	log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 $flush_args
+
+	exp_n_entries=$((t0_n_entries - 1))
+	t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+	[[ $t1_n_entries -eq $exp_n_entries ]]
+	log_test $? 0 "Check how many entries were flushed"
+
+	## Flush by 2 remote's attributes - destination IP and port ##
+	flush_args="dst 192.0.2.2 port 2222"
+
+	fdb_check_n_entries_by_dev_filter vx10 2 $flush_args
+
+	t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+	run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+	log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 $flush_args
+
+	exp_n_entries=$((t0_n_entries - 2))
+	t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+	[[ $t1_n_entries -eq $exp_n_entries ]]
+	log_test $? 0 "Check how many entries were flushed"
+
+	## Flush by source VNI, which is not remote's attribute and VNI ##
+	flush_args="vni 3000 src_vni 5000"
+
+	fdb_check_n_entries_by_dev_filter vx10 1 $flush_args
+
+	t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+	run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+	log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 $flush_args
+
+	exp_n_entries=$((t0_n_entries -1))
+	t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+	[[ $t1_n_entries -eq $exp_n_entries ]]
+	log_test $? 0 "Check how many entries were flushed"
+
+	# Flush by 1 remote's attribute - destination IP ##
+	flush_args="dst 192.0.2.1"
+
+	fdb_check_n_entries_by_dev_filter vx10 2 $flush_args
+
+	t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+	run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+	log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 $flush_args
+
+	exp_n_entries=$((t0_n_entries -2))
+	t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+	[[ $t1_n_entries -eq $exp_n_entries ]]
+	log_test $? 0 "Check how many entries were flushed"
+}
+
+bridge_test_flush_by_dev()
+{
+	local dst_ip=192.0.2.1
+	local br0_n_ent_t0=$($BRIDGE fdb show dev br0 | wc -l)
+	local br1_n_ent_t0=$($BRIDGE fdb show dev br1 | wc -l)
+
+	fdb_add_mac_pool_1 br0 dst $dst_ip
+	fdb_add_mac_pool_2 br1 dst $dst_ip
+
+	# Each 'fdb add' command adds one extra entry in the bridge with the
+	# default vlan.
+	local exp_br0_n_ent=$(($br0_n_ent_t0 + 2 * $mac_pool_1_len))
+	local exp_br1_n_ent=$(($br1_n_ent_t0 + 2 * $mac_pool_2_len))
+
+	fdb_check_n_entries_by_dev_filter br0 $exp_br0_n_ent
+	fdb_check_n_entries_by_dev_filter br1 $exp_br1_n_ent
+
+	run_cmd "$BRIDGE fdb flush dev br0"
+	log_test $? 0 "Flush FDB by dev br0"
+
+	# The default entry should not be flushed
+	fdb_check_n_entries_by_dev_filter br0 1
+	log_test $? 0 "Flush FDB by dev br0 - test br0 entries"
+
+	fdb_check_n_entries_by_dev_filter br1 $exp_br1_n_ent
+	log_test $? 0 "Flush FDB by dev br0 - test br1 entries"
+}
+
+bridge_test_flush_by_vlan()
+{
+	local vlan_1=10
+	local vlan_2=20
+	local vlan_1_ent_t0
+	local vlan_2_ent_t0
+
+	$BRIDGE vlan add vid $vlan_1 dev br0 self
+	$BRIDGE vlan add vid $vlan_2 dev br0 self
+
+	vlan_1_ent_t0=$($BRIDGE fdb show dev br0 | grep "vlan $vlan_1" | wc -l)
+	vlan_2_ent_t0=$($BRIDGE fdb show dev br0 | grep "vlan $vlan_2" | wc -l)
+
+	fdb_add_mac_pool_1 br0 vlan $vlan_1
+	fdb_add_mac_pool_2 br0 vlan $vlan_2
+
+	local exp_vlan_1_ent=$(($vlan_1_ent_t0 + $mac_pool_1_len))
+	local exp_vlan_2_ent=$(($vlan_2_ent_t0 + $mac_pool_2_len))
+
+	fdb_check_n_entries_by_dev_filter br0 $exp_vlan_1_ent vlan $vlan_1
+	fdb_check_n_entries_by_dev_filter br0 $exp_vlan_2_ent vlan $vlan_2
+
+	run_cmd "$BRIDGE fdb flush dev br0 vlan $vlan_1"
+	log_test $? 0 "Flush FDB by dev br0 and vlan $vlan_1"
+
+	fdb_check_n_entries_by_dev_filter br0 0 vlan $vlan_1
+	log_test $? 0 "Test entries with vlan $vlan_1"
+
+	fdb_check_n_entries_by_dev_filter br0 $exp_vlan_2_ent vlan $vlan_2
+	log_test $? 0 "Test entries with vlan $vlan_2"
+}
+
+bridge_vxlan_test_flush()
+{
+	local vlan_1=10
+	local dst_ip=192.0.2.1
+
+	$IP link set dev vx10 master br0
+	$BRIDGE vlan add vid $vlan_1 dev br0 self
+	$BRIDGE vlan add vid $vlan_1 dev vx10
+
+	fdb_add_mac_pool_1 vx10 vni 3000 dst $dst_ip self master
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vlan $vlan_1
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni 3000
+
+	# Such command should fail in VXLAN driver as vlan is not supported,
+	# but the command should flush the entries in the bridge
+	run_cmd "$BRIDGE fdb flush dev vx10 vlan $vlan_1 master self"
+	log_test $? 255 \
+		"Flush FDB by dev vx10, vlan $vlan_1, master and self"
+
+	fdb_check_n_entries_by_dev_filter vx10 0 vlan $vlan_1
+	log_test $? 0 "Test entries with vlan $vlan_1"
+
+	fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip
+	log_test $? 0 "Test entries with dst $dst_ip"
+}
+
+setup()
+{
+	setup_ns NS
+	IP="ip -netns ${NS}"
+	BRIDGE="bridge -netns ${NS}"
+
+	$IP link add name vx10 type vxlan id 1000 dstport "$VXPORT"
+	$IP link add name vx20 type vxlan id 2000 dstport "$VXPORT"
+
+	$IP link add br0 type bridge vlan_filtering 1
+	$IP link add br1 type bridge vlan_filtering 1
+}
+
+cleanup()
+{
+	$IP link del dev br1
+	$IP link del dev br0
+
+	$IP link del dev vx20
+	$IP link del dev vx10
+
+	cleanup_ns ${NS}
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhvw: o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		w) PING_TIMEOUT=$OPTARG;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+# Check a flag that is added to flush command as part of VXLAN flush support
+bridge fdb help 2>&1 | grep -q "\[no\]router"
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing flush command for VXLAN"
+	exit $ksft_skip
+fi
+
+ip link add dev vx10 type vxlan id 1000 2> /dev/null
+out=$(bridge fdb flush dev vx10 2>&1 | grep -q "Operation not supported")
+if [ $? -eq 0 ]; then
+	echo "SKIP: kernel lacks vxlan flush support"
+	exit $ksft_skip
+fi
+ip link del dev vx10
+
+for t in $TESTS
+do
+	setup; $t; cleanup;
+done
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index c287b90b8af8..ec2d6ceb1f08 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -3,6 +3,7 @@
 
 # IPv4 and IPv6 onlink tests
 
+source lib.sh
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 VERBOSE=0
 
@@ -74,9 +75,6 @@ TEST_NET4IN6[2]=10.2.1.254
 # mcast address
 MCAST6=ff02::1
 
-
-PEER_NS=bart
-PEER_CMD="ip netns exec ${PEER_NS}"
 VRF=lisa
 VRF_TABLE=1101
 PBR_TABLE=101
@@ -176,8 +174,7 @@ setup()
 	set -e
 
 	# create namespace
-	ip netns add ${PEER_NS}
-	ip -netns ${PEER_NS} li set lo up
+	setup_ns PEER_NS
 
 	# add vrf table
 	ip li add ${VRF} type vrf table ${VRF_TABLE}
@@ -219,7 +216,7 @@ setup()
 cleanup()
 {
 	# make sure we start from a clean slate
-	ip netns del ${PEER_NS} 2>/dev/null
+	cleanup_ns ${PEER_NS} 2>/dev/null
 	for n in 1 3 5 7; do
 		ip link del ${NETIFS[p${n}]} 2>/dev/null
 	done
diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
index 51df5e305855..e85248609af4 100755
--- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -12,6 +12,7 @@
 #
 # routing in h0 to hN is done with nexthop objects.
 
+source lib.sh
 PAUSE_ON_FAIL=no
 VERBOSE=0
 
@@ -72,12 +73,6 @@ create_ns()
 {
 	local ns=${1}
 
-	ip netns del ${ns} 2>/dev/null
-
-	ip netns add ${ns}
-	ip -netns ${ns} addr add 127.0.0.1/8 dev lo
-	ip -netns ${ns} link set lo up
-
 	ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
 	case ${ns} in
 	h*)
@@ -97,7 +92,13 @@ setup()
 
 	#set -e
 
-	for ns in h0 r1 h1 h2 h3
+	setup_ns h0 r1 h1 h2 h3
+	h[0]=$h0
+	h[1]=$h1
+	h[2]=$h2
+	h[3]=$h3
+	r[1]=$r1
+	for ns in ${h[0]} ${r[1]} ${h[1]} ${h[2]} ${h[3]}
 	do
 		create_ns ${ns}
 	done
@@ -108,35 +109,35 @@ setup()
 
 	for i in 0 1 2 3
 	do
-		ip -netns h${i} li add eth0 type veth peer name r1h${i}
-		ip -netns h${i} li set eth0 up
-		ip -netns h${i} li set r1h${i} netns r1 name eth${i} up
-
-		ip -netns h${i}    addr add dev eth0 172.16.10${i}.1/24
-		ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64
-		ip -netns r1    addr add dev eth${i} 172.16.10${i}.254/24
-		ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64
+		ip -netns ${h[$i]} li add eth0 type veth peer name r1h${i}
+		ip -netns ${h[$i]} li set eth0 up
+		ip -netns ${h[$i]} li set r1h${i} netns ${r[1]} name eth${i} up
+
+		ip -netns ${h[$i]}    addr add dev eth0 172.16.10${i}.1/24
+		ip -netns ${h[$i]} -6 addr add dev eth0 2001:db8:10${i}::1/64
+		ip -netns ${r[1]}    addr add dev eth${i} 172.16.10${i}.254/24
+		ip -netns ${r[1]} -6 addr add dev eth${i} 2001:db8:10${i}::64/64
 	done
 
-	ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0
-	ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0
+	ip -netns ${h[0]} nexthop add id 4 via 172.16.100.254 dev eth0
+	ip -netns ${h[0]} nexthop add id 6 via 2001:db8:100::64 dev eth0
 
-	# routing from h0 to h1-h3 and back
+	# routing from ${h[0]} to h1-h3 and back
 	for i in 1 2 3
 	do
-		ip -netns h0    ro add 172.16.10${i}.0/24 nhid 4
-		ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254
+		ip -netns ${h[0]}    ro add 172.16.10${i}.0/24 nhid 4
+		ip -netns ${h[$i]} ro add 172.16.100.0/24 via 172.16.10${i}.254
 
-		ip -netns h0    -6 ro add 2001:db8:10${i}::/64 nhid 6
-		ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
+		ip -netns ${h[0]}    -6 ro add 2001:db8:10${i}::/64 nhid 6
+		ip -netns ${h[$i]} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
 	done
 
 	if [ "$VERBOSE" = "1" ]; then
 		echo
 		echo "host 1 config"
-		ip -netns h0 li sh
-		ip -netns h0 ro sh
-		ip -netns h0 -6 ro sh
+		ip -netns ${h[0]} li sh
+		ip -netns ${h[0]} ro sh
+		ip -netns ${h[0]} -6 ro sh
 	fi
 
 	#set +e
@@ -144,10 +145,7 @@ setup()
 
 cleanup()
 {
-	for n in h0 r1 h1 h2 h3
-	do
-		ip netns del ${n} 2>/dev/null
-	done
+	cleanup_all_ns
 }
 
 change_mtu()
@@ -156,7 +154,7 @@ change_mtu()
 	local mtu=$2
 
 	run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu}
-	run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu}
+	run_cmd ip -netns ${r1} li set eth${hostid} mtu ${mtu}
 }
 
 ################################################################################
@@ -168,23 +166,23 @@ validate_v4_exception()
 	local mtu=$2
 	local ping_sz=$3
 	local dst="172.16.10${i}.1"
-	local h0=172.16.100.1
-	local r1=172.16.100.254
+	local h0_ip=172.16.100.1
+	local r1_ip=172.16.100.254
 	local rc
 
 	if [ ${ping_sz} != "0" ]; then
-		run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst}
+		run_cmd ip netns exec ${h0} ping -s ${ping_sz} -c5 -w5 ${dst}
 	fi
 
 	if [ "$VERBOSE" = "1" ]; then
 		echo "Route get"
-		ip -netns h0 ro get ${dst}
+		ip -netns ${h0} ro get ${dst}
 		echo "Searching for:"
 		echo "    cache .* mtu ${mtu}"
 		echo
 	fi
 
-	ip -netns h0 ro get ${dst} | \
+	ip -netns ${h0} ro get ${dst} | \
 	grep -q "cache .* mtu ${mtu}"
 	rc=$?
 
@@ -197,24 +195,24 @@ validate_v6_exception()
 	local mtu=$2
 	local ping_sz=$3
 	local dst="2001:db8:10${i}::1"
-	local h0=2001:db8:100::1
-	local r1=2001:db8:100::64
+	local h0_ip=2001:db8:100::1
+	local r1_ip=2001:db8:100::64
 	local rc
 
 	if [ ${ping_sz} != "0" ]; then
-		run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
+		run_cmd ip netns exec ${h0} ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
 	fi
 
 	if [ "$VERBOSE" = "1" ]; then
 		echo "Route get"
-		ip -netns h0 -6 ro get ${dst}
+		ip -netns ${h0} -6 ro get ${dst}
 		echo "Searching for:"
-		echo "    ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+		echo "    ${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}"
 		echo
 	fi
 
-	ip -netns h0 -6 ro get ${dst} | \
-	grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+	ip -netns ${h0} -6 ro get ${dst} | \
+	grep -q "${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}"
 	rc=$?
 
 	log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}"
@@ -242,11 +240,11 @@ for i in 1 2 3
 do
 	# generate a cached route per-cpu
 	for c in ${cpus}; do
-		run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
-		[ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
+		run_cmd taskset -c ${c} ip netns exec ${h0} ping -c1 -w1 172.16.10${i}.1
+		[ $? -ne 0 ] && printf "\nERROR: ping to ${h[$i]} failed\n" && ret=1
 
-		run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1
-		[ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
+		run_cmd taskset -c ${c} ip netns exec ${h0} ${ping6} -c1 -w1 2001:db8:10${i}::1
+		[ $? -ne 0 ] && printf "\nERROR: ping6 to ${h[$i]} failed\n" && ret=1
 
 		[ $ret -ne 0 ] && break
 	done
@@ -282,11 +280,11 @@ if [ $ret -eq 0 ]; then
 	validate_v6_exception 3 1400 0
 
 	# targeted deletes to trigger cleanup paths in kernel
-	ip -netns h0 ro del 172.16.102.0/24 nhid 4
-	ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6
+	ip -netns ${h0} ro del 172.16.102.0/24 nhid 4
+	ip -netns ${h0} -6 ro del 2001:db8:102::/64 nhid 6
 
-	ip -netns h0 nexthop del id 4
-	ip -netns h0 nexthop del id 6
+	ip -netns ${h0} nexthop del id 4
+	ip -netns ${h0} nexthop del id 6
 fi
 
 cleanup
diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh
new file mode 100755
index 000000000000..1ccf56f10171
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# ns: h1               | ns: h2
+#   192.168.0.1/24     |
+#            eth0      |
+#                      |       192.168.1.1/32
+#            veth0 <---|---> veth1
+# Validate source address selection for route without gateway
+
+source lib.sh
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+################################################################################
+# helpers
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "$out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+################################################################################
+# config
+setup()
+{
+	setup_ns h1 h2
+
+	# Add a fake eth0 to support an ip address
+	ip -n $h1 link add name eth0 type dummy
+	ip -n $h1 link set eth0 up
+	ip -n $h1 address add 192.168.0.1/24 dev eth0
+
+	# Configure veths (same @mac, arp off)
+	ip -n $h1 link add name veth0 type veth peer name veth1 netns $h2
+	ip -n $h1 link set veth0 up
+
+	ip -n $h2 link set veth1 up
+
+	# Configure @IP in the peer netns
+	ip -n $h2 address add 192.168.1.1/32 dev veth1
+	ip -n $h2 route add default dev veth1
+
+	# Add a nexthop without @gw and use it in a route
+	ip -n $h1 nexthop add id 1 dev veth0
+	ip -n $h1 route add 192.168.1.1 nhid 1
+}
+
+cleanup()
+{
+	cleanup_ns $h1 $h2
+}
+
+trap cleanup EXIT
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=1;;
+	esac
+done
+
+setup
+
+run_cmd ip -netns $h1 route get 192.168.1.1
+log_test $? 0 "nexthop: get route with nexthop without gw"
+run_cmd ip netns exec $h1 ping -c1 192.168.1.1
+log_test $? 0 "nexthop: ping through nexthop without gw"
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index eb693a3b7b4a..ac0b2c6a5761 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -14,19 +14,52 @@
 # objects. Device reference counts and network namespace cleanup tested
 # by use of network namespace for peer.
 
+source lib.sh
 ret=0
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
-
-ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+IPV4_TESTS="
+	ipv4_fcnal
+	ipv4_grp_fcnal
+	ipv4_res_grp_fcnal
+	ipv4_withv6_fcnal
+	ipv4_fcnal_runtime
+	ipv4_large_grp
+	ipv4_large_res_grp
+	ipv4_compat_mode
+	ipv4_fdb_grp_fcnal
+	ipv4_mpath_select
+	ipv4_torture
+	ipv4_res_torture
+"
+
+IPV6_TESTS="
+	ipv6_fcnal
+	ipv6_grp_fcnal
+	ipv6_res_grp_fcnal
+	ipv6_fcnal_runtime
+	ipv6_large_grp
+	ipv6_large_res_grp
+	ipv6_compat_mode
+	ipv6_fdb_grp_fcnal
+	ipv6_mpath_select
+	ipv6_torture
+	ipv6_res_torture
+"
+
+ALL_TESTS="
+	basic
+	basic_res
+	${IPV4_TESTS}
+	${IPV6_TESTS}
+"
 TESTS="${ALL_TESTS}"
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
+PING_TIMEOUT=5
 
 nsid=100
 
@@ -116,13 +149,7 @@ create_ns()
 {
 	local n=${1}
 
-	ip netns del ${n} 2>/dev/null
-
 	set -e
-	ip netns add ${n}
-	ip netns set ${n} $((nsid++))
-	ip -netns ${n} addr add 127.0.0.1/8 dev lo
-	ip -netns ${n} link set lo up
 
 	ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1
 	ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1
@@ -141,12 +168,13 @@ setup()
 {
 	cleanup
 
-	create_ns me
-	create_ns peer
-	create_ns remote
+	setup_ns me peer remote
+	create_ns $me
+	create_ns $peer
+	create_ns $remote
 
-	IP="ip -netns me"
-	BRIDGE="bridge -netns me"
+	IP="ip -netns $me"
+	BRIDGE="bridge -netns $me"
 	set -e
 	$IP li add veth1 type veth peer name veth2
 	$IP li set veth1 up
@@ -158,24 +186,24 @@ setup()
 	$IP addr add 172.16.2.1/24 dev veth3
 	$IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad
 
-	$IP li set veth2 netns peer up
-	ip -netns peer addr add 172.16.1.2/24 dev veth2
-	ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
+	$IP li set veth2 netns $peer up
+	ip -netns $peer addr add 172.16.1.2/24 dev veth2
+	ip -netns $peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
 
-	$IP li set veth4 netns peer up
-	ip -netns peer addr add 172.16.2.2/24 dev veth4
-	ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
+	$IP li set veth4 netns $peer up
+	ip -netns $peer addr add 172.16.2.2/24 dev veth4
+	ip -netns $peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
 
-	ip -netns remote li add veth5 type veth peer name veth6
-	ip -netns remote li set veth5 up
-	ip -netns remote addr add dev veth5 172.16.101.1/24
-	ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
-	ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
-	ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
+	ip -netns $remote li add veth5 type veth peer name veth6
+	ip -netns $remote li set veth5 up
+	ip -netns $remote addr add dev veth5 172.16.101.1/24
+	ip -netns $remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
+	ip -netns $remote ro add 172.16.0.0/22 via 172.16.101.2
+	ip -netns $remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
 
-	ip -netns remote li set veth6 netns peer up
-	ip -netns peer addr add dev veth6 172.16.101.2/24
-	ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
+	ip -netns $remote li set veth6 netns $peer up
+	ip -netns $peer addr add dev veth6 172.16.101.2/24
+	ip -netns $peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
 	set +e
 }
 
@@ -183,7 +211,7 @@ cleanup()
 {
 	local ns
 
-	for ns in me peer remote; do
+	for ns in $me $peer $remote; do
 		ip netns del ${ns} 2>/dev/null
 	done
 }
@@ -232,6 +260,19 @@ check_nexthop()
 	check_output "${out}" "${expected}"
 }
 
+check_nexthop_bucket()
+{
+	local nharg="$1"
+	local expected="$2"
+	local out
+
+	# remove the idle time since we cannot match it
+	out=$($IP nexthop bucket ${nharg} \
+		| sed s/idle_time\ [0-9.]*\ // 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
 check_route()
 {
 	local pfx="$1"
@@ -308,6 +349,46 @@ check_large_grp()
 	log_test $? 0 "Dump large (x$ecmp) ecmp groups"
 }
 
+check_large_res_grp()
+{
+	local ipv=$1
+	local buckets=$2
+	local ipstr=""
+
+	if [ $ipv -eq 4 ]; then
+		ipstr="172.16.1.2"
+	else
+		ipstr="2001:db8:91::2"
+	fi
+
+	# create a resilient group with $buckets buckets and dump them
+	run_cmd "$IP nexthop add id 100 via $ipstr dev veth1"
+	run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets"
+	run_cmd "$IP nexthop bucket list"
+	log_test $? 0 "Dump large (x$buckets) nexthop buckets"
+}
+
+get_route_dev()
+{
+	local pfx="$1"
+	local out
+
+	if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then
+		echo "$out"
+	fi
+}
+
+check_route_dev()
+{
+	local pfx="$1"
+	local expected="$2"
+	local out
+
+	out=$(get_route_dev "$pfx")
+
+	check_output "$out" "$expected"
+}
+
 start_ip_monitor()
 {
 	local mtype=$1
@@ -344,6 +425,15 @@ check_nexthop_fdb_support()
 	fi
 }
 
+check_nexthop_res_support()
+{
+	$IP nexthop help 2>&1 | grep -q resilient
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 too old, missing resilient nexthop group support"
+		return $ksft_skip
+	fi
+}
+
 ipv6_fdb_grp_fcnal()
 {
 	local rc
@@ -504,6 +594,112 @@ ipv4_fdb_grp_fcnal()
 	$IP link del dev vx10
 }
 
+ipv4_mpath_select()
+{
+	local rc dev match h addr
+
+	echo
+	echo "IPv4 multipath selection"
+	echo "------------------------"
+	if [ ! -x "$(command -v jq)" ]; then
+		echo "SKIP: Could not run test; need jq tool"
+		return $ksft_skip
+	fi
+
+	# Use status of existing neighbor entry when determining nexthop for
+	# multipath routes.
+	local -A gws
+	gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2)
+	local -A other_dev
+	other_dev=([veth1]=veth3 [veth3]=veth1)
+
+	run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
+	run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
+	run_cmd "$IP nexthop add id 1001 group 1/2"
+	run_cmd "$IP ro add 172.16.101.0/24 nhid 1001"
+	rc=0
+	for dev in veth1 veth3; do
+		match=0
+		for h in {1..254}; do
+			addr="172.16.101.$h"
+			if [ "$(get_route_dev "$addr")" = "$dev" ]; then
+				match=1
+				break
+			fi
+		done
+		if (( match == 0 )); then
+			echo "SKIP: Did not find a route using device $dev"
+			return $ksft_skip
+		fi
+		run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
+		if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
+			rc=1
+			break
+		fi
+		run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
+	done
+	log_test $rc 0 "Use valid neighbor during multipath selection"
+
+	run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete"
+	run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete"
+	run_cmd "$IP route get 172.16.101.1"
+	# if we did not crash, success
+	log_test $rc 0 "Multipath selection with no valid neighbor"
+}
+
+ipv6_mpath_select()
+{
+	local rc dev match h addr
+
+	echo
+	echo "IPv6 multipath selection"
+	echo "------------------------"
+	if [ ! -x "$(command -v jq)" ]; then
+		echo "SKIP: Could not run test; need jq tool"
+		return $ksft_skip
+	fi
+
+	# Use status of existing neighbor entry when determining nexthop for
+	# multipath routes.
+	local -A gws
+	gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2)
+	local -A other_dev
+	other_dev=([veth1]=veth3 [veth3]=veth1)
+
+	run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
+	run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
+	run_cmd "$IP nexthop add id 1001 group 1/2"
+	run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001"
+	rc=0
+	for dev in veth1 veth3; do
+		match=0
+		for h in {1..65535}; do
+			addr=$(printf "2001:db8:101::%x" $h)
+			if [ "$(get_route_dev "$addr")" = "$dev" ]; then
+				match=1
+				break
+			fi
+		done
+		if (( match == 0 )); then
+			echo "SKIP: Did not find a route using device $dev"
+			return $ksft_skip
+		fi
+		run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
+		if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
+			rc=1
+			break
+		fi
+		run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
+	done
+	log_test $rc 0 "Use valid neighbor during multipath selection"
+
+	run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete"
+	run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete"
+	run_cmd "$IP route get 2001:db8:101::1"
+	# if we did not crash, success
+	log_test $rc 0 "Multipath selection with no valid neighbor"
+}
+
 ################################################################################
 # basic operations (add, delete, replace) on nexthops and nexthop groups
 #
@@ -559,6 +755,66 @@ ipv6_fcnal()
 	log_test $? 0 "Nexthops removed on admin down"
 }
 
+ipv6_grp_refs()
+{
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP link set dev veth1 up"
+	run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10"
+	run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20"
+	run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10"
+	run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20"
+	run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10"
+	run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20"
+	run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10"
+	run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20"
+	run_cmd "$IP nexthop add id 102 group 100"
+	run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
+
+	# create per-cpu dsts through nh 100
+	run_cmd "ip netns exec $me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
+
+	# remove nh 100 from the group to delete the route potentially leaving
+	# a stale per-cpu dst which holds a reference to the nexthop's net
+	# device and to the IPv6 route
+	run_cmd "$IP nexthop replace id 102 group 101"
+	run_cmd "$IP route del 2001:db8:101::1/128"
+
+	# add both nexthops to the group so a reference is taken on them
+	run_cmd "$IP nexthop replace id 102 group 100/101"
+
+	# if the bug described in commit "net: nexthop: release IPv6 per-cpu
+	# dsts when replacing a nexthop group" exists at this point we have
+	# an unlinked IPv6 route (but not freed due to stale dst) with a
+	# reference over the group so we delete the group which will again
+	# only unlink it due to the route reference
+	run_cmd "$IP nexthop del id 102"
+
+	# delete the nexthop with stale dst, since we have an unlinked
+	# group with a ref to it and an unlinked IPv6 route with ref to the
+	# group, the nh will only be unlinked and not freed so the stale dst
+	# remains forever and we get a net device refcount imbalance
+	run_cmd "$IP nexthop del id 100"
+
+	# if a reference was lost this command will hang because the net device
+	# cannot be removed
+	timeout -s KILL 5 ip netns exec $me ip link del veth1.10 >/dev/null 2>&1
+
+	# we can't cleanup if the command is hung trying to delete the netdev
+	if [ $? -eq 137 ]; then
+		return 1
+	fi
+
+	# cleanup
+	run_cmd "$IP link del veth1.20"
+	run_cmd "$IP nexthop flush"
+
+	return 0
+}
+
 ipv6_grp_fcnal()
 {
 	local rc
@@ -664,6 +920,73 @@ ipv6_grp_fcnal()
 
 	run_cmd "$IP nexthop add id 108 group 31/24"
 	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+
+	ipv6_grp_refs
+	log_test $? 0 "Nexthop group replace refcounts"
+}
+
+ipv6_res_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6 resilient groups functional"
+	echo "--------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	#
+	# migration of nexthop buckets - equal weights
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 102" \
+		"id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 62 id 102 index 1 nhid 62"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 63 id 102 index 1 nhid 62"
+	log_test $? 0 "Nexthop buckets updated after replace"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# migration of nexthop buckets - unequal weights
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 102" \
+		"id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+	log_test $? 0 "Nexthop buckets updated after replace - nECMP"
 }
 
 ipv6_fcnal_runtime()
@@ -685,13 +1008,13 @@ ipv6_fcnal_runtime()
 	log_test $? 0 "Route delete"
 
 	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping with nexthop"
 
 	run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 81/82"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping - multipath"
 
 	#
@@ -699,26 +1022,26 @@ ipv6_fcnal_runtime()
 	#
 	run_cmd "$IP -6 nexthop add id 83 blackhole"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP -6 nexthop replace id 83 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 83"
-		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 81/82"
-		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -791,17 +1114,25 @@ ipv6_fcnal_runtime()
 	run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
 	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
 
+	# route can not use prefsrc with nexthops
+	run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1"
+	log_test $? 2 "IPv6 route can not use src routing with external nexthop"
+
+	# check cleanup path on invalid metric
+	run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo"
+	log_test $? 2 "IPv6 route with invalid metric"
+
 	# rpfilter and default route
 	$IP nexthop flush >/dev/null 2>&1
-	run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
+	run_cmd "ip netns exec $me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
 	run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1"
 	run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 93 group 91/92"
 	run_cmd "$IP -6 ro add default nhid 91"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Nexthop with default route and rpfilter"
 	run_cmd "$IP -6 ro replace default nhid 93"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Nexthop with multipath default route and rpfilter"
 
 	# TO-DO:
@@ -824,6 +1155,22 @@ ipv6_large_grp()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+ipv6_large_res_grp()
+{
+	echo
+	echo "IPv6 large resilient group (128k buckets)"
+	echo "-----------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	check_large_res_grp 6 $((128 * 1024))
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 ipv6_del_add_loop1()
 {
 	while :; do
@@ -865,20 +1212,76 @@ ipv6_torture()
 	pid1=$!
 	ipv6_grp_replace_loop &
 	pid2=$!
-	ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
 	pid3=$!
-	ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
 	pid4=$!
-	ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	ip netns exec $me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
 	pid5=$!
 
 	sleep 300
 	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
 
 	# if we did not crash, success
 	log_test 0 0 "IPv6 torture test"
 }
 
+ipv6_res_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101 type resilient
+	done >/dev/null 2>&1
+}
+
+ipv6_res_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv6 runtime resilient nexthop group torture"
+	echo "--------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+	run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+	run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+	ipv6_del_add_loop1 &
+	pid1=$!
+	ipv6_res_grp_replace_loop &
+	pid2=$!
+	ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec $me mausezahn -6 veth1 \
+			    -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \
+			    -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+	# if we did not crash, success
+	log_test 0 0 "IPv6 resilient nexthop group torture test"
+}
 
 ipv4_fcnal()
 {
@@ -931,6 +1334,36 @@ ipv4_fcnal()
 	set +e
 	check_nexthop "dev veth1" ""
 	log_test $? 0 "Nexthops removed on admin down"
+
+	# nexthop route delete warning: route add with nhid and delete
+	# using device
+	run_cmd "$IP li set dev veth1 up"
+	run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1"
+	out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+	run_cmd "$IP route add 172.16.101.1/32 nhid 12"
+	run_cmd "$IP route delete 172.16.101.1/32 dev veth1"
+	out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+	[ $out1 -eq $out2 ]
+	rc=$?
+	log_test $rc 0 "Delete nexthop route warning"
+	run_cmd "$IP route delete 172.16.101.1/32 nhid 12"
+	run_cmd "$IP nexthop del id 12"
+
+	run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1"
+	run_cmd "$IP ro add 172.16.101.0/24 nhid 21"
+	run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1"
+	log_test $? 2 "Delete multipath route with only nh id based entry"
+
+	run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1"
+	run_cmd "$IP ro add 172.16.102.0/24 nhid 22"
+	run_cmd "$IP ro del 172.16.102.0/24 dev veth1"
+	log_test $? 2 "Delete route when specifying only nexthop device"
+
+	run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6"
+	log_test $? 2 "Delete route when specifying only gateway"
+
+	run_cmd "$IP ro del 172.16.102.0/24"
+	log_test $? 0 "Delete route when not specifying nexthop attributes"
 }
 
 ipv4_grp_fcnal()
@@ -1038,12 +1471,76 @@ ipv4_grp_fcnal()
 	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
 }
 
+ipv4_res_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 resilient groups functional"
+	echo "--------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	#
+	# migration of nexthop buckets - equal weights
+	#
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 102" \
+		"id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 12 id 102 index 1 nhid 12"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 13 id 102 index 1 nhid 12"
+	log_test $? 0 "Nexthop buckets updated after replace"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# migration of nexthop buckets - unequal weights
+	#
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 102" \
+		"id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+	log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
 ipv4_withv6_fcnal()
 {
 	local lladdr
 
 	set -e
-	lladdr=$(get_linklocal veth2 peer)
+	lladdr=$(get_linklocal veth2 $peer)
 	run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1"
 	set +e
 	run_cmd "$IP ro add 172.16.101.1/32 nhid 11"
@@ -1096,18 +1593,22 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
 	log_test $? 2 "Nexthop replace with invalid scope for existing route"
 
+	# check cleanup path on invalid metric
+	run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo"
+	log_test $? 2 "IPv4 route with invalid metric"
+
 	#
 	# add route with nexthop and check traffic
 	#
 	run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Basic ping"
 
 	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 21/22"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multipath"
 
 	run_cmd "$IP ro delete 172.16.101.1/32 nhid 122"
@@ -1118,7 +1619,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro add default nhid 501"
 	run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh first"
 
 	# flip the order
@@ -1127,7 +1628,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20"
 	run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1"
 	run_cmd "$IP ro add default nhid 501 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh second"
 
 	run_cmd "$IP nexthop delete nhid 501"
@@ -1138,26 +1639,26 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop add id 23 blackhole"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP nexthop replace id 23 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 23"
-		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 21/22"
-		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1180,11 +1681,11 @@ ipv4_fcnal_runtime()
 	# IPv4 with IPv6
 	#
 	set -e
-	lladdr=$(get_linklocal veth2 peer)
+	lladdr=$(get_linklocal veth2 $peer)
 	run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
 	set +e
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1208,11 +1709,11 @@ ipv4_fcnal_runtime()
 
 	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
 
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv4 route with IPv6 gateway"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1229,7 +1730,7 @@ ipv4_fcnal_runtime()
 
 	run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
 	run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv4 default route with IPv6 gateway"
 
 	#
@@ -1259,12 +1760,28 @@ ipv4_large_grp()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+ipv4_large_res_grp()
+{
+	echo
+	echo "IPv4 large resilient group (128k buckets)"
+	echo "-----------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	check_large_res_grp 4 $((128 * 1024))
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 sysctl_nexthop_compat_mode_check()
 {
 	local sysctlname="net.ipv4.nexthop_compat_mode"
 	local lprefix=$1
 
-	IPE="ip netns exec me"
+	IPE="ip netns exec $me"
 
 	$IPE sysctl -q $sysctlname 2>&1 >/dev/null
 	if [ $? -ne 0 ]; then
@@ -1283,7 +1800,7 @@ sysctl_nexthop_compat_mode_set()
 	local mode=$1
 	local lprefix=$2
 
-	IPE="ip netns exec me"
+	IPE="ip netns exec $me"
 
 	out=$($IPE sysctl -w $sysctlname=$mode)
 	log_test $? 0 "$lprefix set compat mode - $mode"
@@ -1467,20 +1984,77 @@ ipv4_torture()
 	pid1=$!
 	ipv4_grp_replace_loop &
 	pid2=$!
-	ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 &
 	pid3=$!
-	ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 &
 	pid4=$!
-	ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	ip netns exec $me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
 	pid5=$!
 
 	sleep 300
 	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
 
 	# if we did not crash, success
 	log_test 0 0 "IPv4 torture test"
 }
 
+ipv4_res_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101 type resilient
+	done >/dev/null 2>&1
+}
+
+ipv4_res_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv4 runtime resilient nexthop group torture"
+	echo "--------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+	run_cmd "$IP route add 172.16.101.1 nhid 102"
+	run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+	ipv4_del_add_loop1 &
+	pid1=$!
+	ipv4_res_grp_replace_loop &
+	pid2=$!
+	ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec $me mausezahn veth1 \
+				-B 172.16.101.2 -A 172.16.1.1 -c 0 \
+				-t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+	# if we did not crash, success
+	log_test 0 0 "IPv4 resilient nexthop group torture test"
+}
+
 basic()
 {
 	echo
@@ -1492,6 +2066,12 @@ basic()
 	run_cmd "$IP nexthop get id 1"
 	log_test $? 2 "Nexthop get on non-existent id"
 
+	run_cmd "$IP nexthop del id 1"
+	log_test $? 2 "Nexthop del with non-existent id"
+
+	run_cmd "$IP nexthop del id 1 group 1/2/3/4/5/6/7/8"
+	log_test $? 2 "Nexthop del with non-existent id and extra attributes"
+
 	# attempt to create nh without a device or gw - fails
 	run_cmd "$IP nexthop add id 1"
 	log_test $? 2 "Nexthop with no device or gateway"
@@ -1503,10 +2083,10 @@ basic()
 
 	# create nh with linkdown device - fails
 	$IP li set veth1 up
-	ip -netns peer li set veth2 down
+	ip -netns $peer li set veth2 down
 	run_cmd "$IP nexthop add id 1 dev veth1"
 	log_test $? 2 "Nexthop with device that is linkdown"
-	ip -netns peer li set veth2 up
+	ip -netns $peer li set veth2 up
 
 	# device only
 	run_cmd "$IP nexthop add id 1 dev veth1"
@@ -1524,6 +2104,19 @@ basic()
 	run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
 	log_test $? 2 "Blackhole nexthop with other attributes"
 
+	# blackhole nexthop should not be affected by the state of the loopback
+	# device
+	run_cmd "$IP link set dev lo down"
+	check_nexthop "id 2" "id 2 blackhole"
+	log_test $? 0 "Blackhole nexthop with loopback device down"
+
+	run_cmd "$IP link set dev lo up"
+
+	# Dump should not loop endlessly when maximum nexthop ID is configured.
+	run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
+	run_cmd "timeout 5 $IP nexthop"
+	log_test $? 0 "Maximum nexthop ID dump"
+
 	#
 	# groups
 	#
@@ -1582,6 +2175,225 @@ basic()
 	log_test $? 2 "Nexthop group and blackhole"
 
 	$IP nexthop flush >/dev/null 2>&1
+
+	# Test to ensure that flushing with a multi-part nexthop dump works as
+	# expected.
+	local batch_file=$(mktemp)
+
+	for i in $(seq 1 $((64 * 1024))); do
+		echo "nexthop add id $i blackhole" >> $batch_file
+	done
+
+	$IP -b $batch_file
+	$IP nexthop flush >/dev/null 2>&1
+	[[ $($IP nexthop | wc -l) -eq 0 ]]
+	log_test $? 0 "Large scale nexthop flushing"
+
+	rm $batch_file
+}
+
+check_nexthop_buckets_balance()
+{
+	local nharg=$1; shift
+	local ret
+
+	while (($# > 0)); do
+		local selector=$1; shift
+		local condition=$1; shift
+		local count
+
+		count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length)
+		(( $count $condition ))
+		ret=$?
+		if ((ret != 0)); then
+			return $ret
+		fi
+	done
+
+	return 0
+}
+
+basic_res()
+{
+	echo
+	echo "Basic resilient nexthop group functional tests"
+	echo "----------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	run_cmd "$IP nexthop add id 1 dev veth1"
+
+	#
+	# resilient nexthop group addition
+	#
+
+	run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8"
+	log_test $? 0 "Add a nexthop group with default parameters"
+
+	run_cmd "$IP nexthop get id 101"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group with default parameters"
+
+	run_cmd "$IP nexthop add id 102 group 1 type resilient
+			buckets 4 idle_timer 100 unbalanced_timer 5"
+	run_cmd "$IP nexthop get id 102"
+	check_nexthop "id 102" \
+		"id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group with non-default parameters"
+
+	run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0"
+	log_test $? 2 "Add a nexthop group with 0 buckets"
+
+	#
+	# resilient nexthop group replacement
+	#
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient
+			buckets 8 idle_timer 240 unbalanced_timer 80"
+	log_test $? 0 "Replace nexthop group parameters"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing parameters"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512"
+	log_test $? 0 "Replace idle timer"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing idle timer"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256"
+	log_test $? 0 "Replace unbalanced timer"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing unbalanced timer"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient"
+	log_test $? 0 "Replace with no parameters"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing no parameters"
+
+	run_cmd "$IP nexthop replace id 101 group 1"
+	log_test $? 2 "Replace nexthop group type - implicit"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type mpath"
+	log_test $? 2 "Replace nexthop group type - explicit"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024"
+	log_test $? 2 "Replace number of nexthop buckets"
+
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing with invalid parameters"
+
+	#
+	# resilient nexthop buckets dump
+	#
+
+	$IP nexthop flush >/dev/null 2>&1
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	run_cmd "$IP nexthop add id 2 dev veth3"
+	run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4"
+	run_cmd "$IP nexthop add id 201 group 1/2"
+
+	check_nexthop_bucket "" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets"
+
+	check_nexthop_bucket "list id 101" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets in a group"
+
+	sleep 0.1
+	(( $($IP -j nexthop bucket list id 101 |
+	     jq '[.[] | select(.bucket.idle_time > 0 and
+	                       .bucket.idle_time < 2)] | length') == 4 ))
+	log_test $? 0 "All nexthop buckets report a positive near-zero idle time"
+
+	check_nexthop_bucket "list dev veth1" \
+		"id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets with a specific nexthop device"
+
+	check_nexthop_bucket "list nhid 2" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2"
+	log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier"
+
+	run_cmd "$IP nexthop bucket list id 111"
+	log_test $? 2 "Dump all nexthop buckets in a non-existent group"
+
+	run_cmd "$IP nexthop bucket list id 201"
+	log_test $? 2 "Dump all nexthop buckets in a non-resilient group"
+
+	run_cmd "$IP nexthop bucket list dev bla"
+	log_test $? 255 "Dump all nexthop buckets using a non-existent device"
+
+	run_cmd "$IP nexthop bucket list groups"
+	log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword"
+
+	run_cmd "$IP nexthop bucket list fdb"
+	log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+
+	# Dump should not loop endlessly when maximum nexthop ID is configured.
+	run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
+	run_cmd "timeout 5 $IP nexthop bucket"
+	log_test $? 0 "Maximum nexthop ID dump"
+
+	#
+	# resilient nexthop buckets get requests
+	#
+
+	check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2"
+	log_test $? 0 "Get a valid nexthop bucket"
+
+	run_cmd "$IP nexthop bucket get id 101 index 999"
+	log_test $? 2 "Get a nexthop bucket with valid group, but invalid index"
+
+	run_cmd "$IP nexthop bucket get id 201 index 0"
+	log_test $? 2 "Get a nexthop bucket from a non-resilient group"
+
+	run_cmd "$IP nexthop bucket get id 999 index 0"
+	log_test $? 2 "Get a nexthop bucket from a non-existent group"
+
+	#
+	# tests for bucket migration
+	#
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	run_cmd "$IP nexthop add id 2 dev veth3"
+	run_cmd "$IP nexthop add id 101
+			group 1/2 type resilient buckets 10
+			idle_timer 1 unbalanced_timer 20"
+
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 5" \
+				      "nhid 2" "== 5"
+	log_test $? 0 "Initial bucket allocation"
+
+	run_cmd "$IP nexthop replace id 101
+			group 1,2/2,3 type resilient"
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 4" \
+				      "nhid 2" "== 6"
+	log_test $? 0 "Bucket allocation after replace"
+
+	# Check that increase in idle timer does not make buckets appear busy.
+	run_cmd "$IP nexthop replace id 101
+			group 1,2/2,3 type resilient
+			idle_timer 10"
+	run_cmd "$IP nexthop replace id 101
+			group 1/2 type resilient"
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 5" \
+				      "nhid 2" "== 5"
+	log_test $? 0 "Buckets migrated after idle timer change"
+
+	$IP nexthop flush >/dev/null 2>&1
 }
 
 ################################################################################
@@ -1599,6 +2411,7 @@ usage: ${0##*/} OPTS
         -p          Pause on fail
         -P          Pause after each test before cleanup
         -v          verbose mode (show commands and output)
+	-w	    Timeout for ping
 
     Runtime test
 	-n num	    Number of nexthops to target
@@ -1611,7 +2424,7 @@ EOF
 ################################################################################
 # main
 
-while getopts :t:pP46hv o
+while getopts :t:pP46hvw: o
 do
 	case $o in
 		t) TESTS=$OPTARG;;
@@ -1620,6 +2433,7 @@ do
 		p) PAUSE_ON_FAIL=yes;;
 		P) PAUSE=yes;;
 		v) VERBOSE=$(($VERBOSE + 1));;
+		w) PING_TIMEOUT=$OPTARG;;
 		h) usage; exit 0;;
 		*) usage; exit 1;;
 	esac
@@ -1653,7 +2467,7 @@ fi
 for t in $TESTS
 do
 	case $t in
-	none) IP="ip -netns peer"; setup; exit 0;;
+	none) IP="ip -netns $peer"; setup; exit 0;;
 	*) setup; $t; cleanup;;
 	esac
 done
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index a93e6b690e06..51157a5559b7 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -3,12 +3,12 @@
 
 # This test is for checking IPv4 and IPv6 FIB rules API
 
+source lib.sh
 ret=0
-
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
-IP="ip -netns testns"
 
 RTABLE=100
+RTABLE_PEER=101
 GW_IP4=192.51.100.2
 SRC_IP=192.51.100.3
 GW_IP6=2001:db8:1::2
@@ -17,6 +17,9 @@ SRC_IP6=2001:db8:1::3
 DEV_ADDR=192.51.100.1
 DEV_ADDR6=2001:db8:1::1
 DEV=dummy0
+TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect"
+
+SELFTEST_PATH=""
 
 log_test()
 {
@@ -48,11 +51,36 @@ log_section()
 	echo "######################################################################"
 }
 
+check_nettest()
+{
+	if which nettest > /dev/null 2>&1; then
+		return 0
+	fi
+
+	# Add the selftest directory to PATH if not already done
+	if [ "${SELFTEST_PATH}" = "" ]; then
+		SELFTEST_PATH="$(dirname $0)"
+		PATH="${PATH}:${SELFTEST_PATH}"
+
+		# Now retry with the new path
+		if which nettest > /dev/null 2>&1; then
+			return 0
+		fi
+
+		if [ "${ret}" -eq 0 ]; then
+			ret="${ksft_skip}"
+		fi
+		echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')"
+	fi
+
+	return 1
+}
+
 setup()
 {
 	set -e
-	ip netns add testns
-	$IP link set dev lo up
+	setup_ns testns
+	IP="ip -netns $testns"
 
 	$IP link add dummy0 type dummy
 	$IP link set dev dummy0 up
@@ -65,7 +93,41 @@ setup()
 cleanup()
 {
 	$IP link del dev dummy0 &> /dev/null
-	ip netns del testns
+	cleanup_ns $testns
+}
+
+setup_peer()
+{
+	set -e
+
+	setup_ns peerns
+	IP_PEER="ip -netns $peerns"
+	$IP_PEER link set dev lo up
+
+	ip link add name veth0 netns $testns type veth \
+		peer name veth1 netns $peerns
+	$IP link set dev veth0 up
+	$IP_PEER link set dev veth1 up
+
+	$IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0
+	$IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1
+
+	$IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad
+	$IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad
+
+	$IP_PEER address add 198.51.100.11/32 dev lo
+	$IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11
+
+	$IP_PEER address add 2001:db8::1:11/128 dev lo
+	$IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11
+
+	set +e
+}
+
+cleanup_peer()
+{
+	$IP link del dev veth0
+	ip netns del $peerns
 }
 
 fib_check_iproute_support()
@@ -93,7 +155,7 @@ fib_rule6_del()
 
 fib_rule6_del_by_pref()
 {
-	pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	pref=$($IP -6 rule show $1 table $RTABLE | cut -d ":" -f 1)
 	$IP -6 rule del pref $pref
 }
 
@@ -101,17 +163,36 @@ fib_rule6_test_match_n_redirect()
 {
 	local match="$1"
 	local getmatch="$2"
+	local description="$3"
 
 	$IP -6 rule add $match table $RTABLE
 	$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
-	log_test $? 0 "rule6 check: $1"
+	log_test $? 0 "rule6 check: $description"
 
 	fib_rule6_del_by_pref "$match"
-	log_test $? 0 "rule6 del by pref: $match"
+	log_test $? 0 "rule6 del by pref: $description"
+}
+
+fib_rule6_test_reject()
+{
+	local match="$1"
+	local rc
+
+	$IP -6 rule add $match table $RTABLE 2>/dev/null
+	rc=$?
+	log_test $rc 2 "rule6 check: $match"
+
+	if [ $rc -eq 0 ]; then
+		$IP -6 rule del $match table $RTABLE
+	fi
 }
 
 fib_rule6_test()
 {
+	local getmatch
+	local match
+	local cnt
+
 	# setup the fib rule redirect route
 	$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
 
@@ -121,8 +202,21 @@ fib_rule6_test()
 	match="from $SRC_IP6 iif $DEV"
 	fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
 
+	# Reject dsfield (tos) options which have ECN bits set
+	for cnt in $(seq 1 3); do
+		match="dsfield $cnt"
+		fib_rule6_test_reject "$match"
+	done
+
+	# Don't take ECN bits into account when matching on dsfield
 	match="tos 0x10"
-	fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+	for cnt in "0x10" "0x11" "0x12" "0x13"; do
+		# Using option 'tos' instead of 'dsfield' as old iproute2
+		# versions don't support 'dsfield' in ip rule show.
+		getmatch="tos $cnt"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+						"$getmatch redirect to table"
+	done
 
 	match="fwmark 0x64"
 	getmatch="mark 0x64"
@@ -154,6 +248,37 @@ fib_rule6_test()
 	fi
 }
 
+# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly
+# taken into account when connecting the socket and when sending packets.
+fib_rule6_connect_test()
+{
+	local dsfield
+
+	if ! check_nettest; then
+		echo "SKIP: Could not run test without nettest tool"
+		return
+	fi
+
+	setup_peer
+	$IP -6 rule add dsfield 0x04 table $RTABLE_PEER
+
+	# Combine the base DS Field value (0x04) with all possible ECN values
+	# (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
+	# The ECN bits shouldn't influence the result of the test.
+	for dsfield in 0x04 0x05 0x06 0x07; do
+		nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \
+			-Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11
+		log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})"
+
+		nettest -q -6 -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \
+			-l 2001:db8::1:11 -r 2001:db8::1:11
+		log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
+	done
+
+	$IP -6 rule del dsfield 0x04 table $RTABLE_PEER
+	cleanup_peer
+}
+
 fib_rule4_del()
 {
 	$IP rule del $1
@@ -162,7 +287,7 @@ fib_rule4_del()
 
 fib_rule4_del_by_pref()
 {
-	pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	pref=$($IP rule show $1 table $RTABLE | cut -d ":" -f 1)
 	$IP rule del pref $pref
 }
 
@@ -170,17 +295,36 @@ fib_rule4_test_match_n_redirect()
 {
 	local match="$1"
 	local getmatch="$2"
+	local description="$3"
 
 	$IP rule add $match table $RTABLE
 	$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
-	log_test $? 0 "rule4 check: $1"
+	log_test $? 0 "rule4 check: $description"
 
 	fib_rule4_del_by_pref "$match"
-	log_test $? 0 "rule4 del by pref: $match"
+	log_test $? 0 "rule4 del by pref: $description"
+}
+
+fib_rule4_test_reject()
+{
+	local match="$1"
+	local rc
+
+	$IP rule add $match table $RTABLE 2>/dev/null
+	rc=$?
+	log_test $rc 2 "rule4 check: $match"
+
+	if [ $rc -eq 0 ]; then
+		$IP rule del $match table $RTABLE
+	fi
 }
 
 fib_rule4_test()
 {
+	local getmatch
+	local match
+	local cnt
+
 	# setup the fib rule redirect route
 	$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
 
@@ -189,14 +333,27 @@ fib_rule4_test()
 
 	# need enable forwarding and disable rp_filter temporarily as all the
 	# addresses are in the same subnet and egress device == ingress device.
-	ip netns exec testns sysctl -w net.ipv4.ip_forward=1
-	ip netns exec testns sysctl -w net.ipv4.conf.$DEV.rp_filter=0
+	ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
 	match="from $SRC_IP iif $DEV"
 	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
-	ip netns exec testns sysctl -w net.ipv4.ip_forward=0
+	ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0
+
+	# Reject dsfield (tos) options which have ECN bits set
+	for cnt in $(seq 1 3); do
+		match="dsfield $cnt"
+		fib_rule4_test_reject "$match"
+	done
 
+	# Don't take ECN bits into account when matching on dsfield
 	match="tos 0x10"
-	fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+	for cnt in "0x10" "0x11" "0x12" "0x13"; do
+		# Using option 'tos' instead of 'dsfield' as old iproute2
+		# versions don't support 'dsfield' in ip rule show.
+		getmatch="tos $cnt"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+						"$getmatch redirect to table"
+	done
 
 	match="fwmark 0x64"
 	getmatch="mark 0x64"
@@ -228,6 +385,37 @@ fib_rule4_test()
 	fi
 }
 
+# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken
+# into account when connecting the socket and when sending packets.
+fib_rule4_connect_test()
+{
+	local dsfield
+
+	if ! check_nettest; then
+		echo "SKIP: Could not run test without nettest tool"
+		return
+	fi
+
+	setup_peer
+	$IP -4 rule add dsfield 0x04 table $RTABLE_PEER
+
+	# Combine the base DS Field value (0x04) with all possible ECN values
+	# (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
+	# The ECN bits shouldn't influence the result of the test.
+	for dsfield in 0x04 0x05 0x06 0x07; do
+		nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q "${dsfield}" \
+			-l 198.51.100.11 -r 198.51.100.11
+		log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})"
+
+		nettest -q -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \
+			-l 198.51.100.11 -r 198.51.100.11
+		log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
+	done
+
+	$IP -4 rule del dsfield 0x04 table $RTABLE_PEER
+	cleanup_peer
+}
+
 run_fibrule_tests()
 {
 	log_section "IPv4 fib rule"
@@ -235,21 +423,55 @@ run_fibrule_tests()
 	log_section "IPv6 fib rule"
 	fib_rule6_test
 }
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
 
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 # start clean
 cleanup &> /dev/null
 setup
-run_fibrule_tests
+for t in $TESTS
+do
+	case $t in
+	fib_rule6_test|fib_rule6)		fib_rule6_test;;
+	fib_rule4_test|fib_rule4)		fib_rule4_test;;
+	fib_rule6_connect_test|fib_rule6_connect)	fib_rule6_connect_test;;
+	fib_rule4_connect_test|fib_rule4_connect)	fib_rule4_connect_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+
+	esac
+done
 cleanup
 
 if [ "$TESTS" != "none" ]; then
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 84205c3a55eb..73895711cdf4 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -3,19 +3,19 @@
 
 # This test is for checking IPv4 and IPv6 FIB behavior in response to
 # different events.
-
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr"
+TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
+       ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
+       ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
+       ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
+       ipv4_mpath_list ipv6_mpath_list"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
-IP="ip -netns ns1"
-NS_EXEC="ip netns exec ns1"
 
 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 
@@ -51,11 +51,11 @@ log_test()
 setup()
 {
 	set -e
-	ip netns add ns1
-	ip netns set ns1 auto
-	$IP link set dev lo up
-	ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
-	ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
+	setup_ns ns1
+	IP="$(which ip) -netns $ns1"
+	NS_EXEC="$(which ip) netns exec $ns1"
+	ip netns exec $ns1 sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
 
 	$IP link add dummy0 type dummy
 	$IP link set dev dummy0 up
@@ -68,8 +68,7 @@ setup()
 cleanup()
 {
 	$IP link del dev dummy0 &> /dev/null
-	ip netns del ns1
-	ip netns del ns2 &> /dev/null
+	cleanup_ns $ns1 $ns2
 }
 
 get_linklocal()
@@ -444,24 +443,60 @@ fib_rp_filter_test()
 	setup
 
 	set -e
+	setup_ns ns2
+
+	$IP link add name veth1 type veth peer name veth2
+	$IP link set dev veth2 netns $ns2
+	$IP address add 192.0.2.1/24 dev veth1
+	ip -netns $ns2 address add 192.0.2.1/24 dev veth2
+	$IP link set dev veth1 up
+	ip -netns $ns2 link set dev veth2 up
+
 	$IP link set dev lo address 52:54:00:6a:c7:5e
-	$IP link set dummy0 address 52:54:00:6a:c7:5e
-	$IP link add dummy1 type dummy
-	$IP link set dummy1 address 52:54:00:6a:c7:5e
-	$IP link set dev dummy1 up
+	$IP link set dev veth1 address 52:54:00:6a:c7:5e
+	ip -netns $ns2 link set dev lo address 52:54:00:6a:c7:5e
+	ip -netns $ns2 link set dev veth2 address 52:54:00:6a:c7:5e
+
+	# 1. (ns2) redirect lo's egress to veth2's egress
+	ip netns exec $ns2 tc qdisc add dev lo parent root handle 1: fq_codel
+	ip netns exec $ns2 tc filter add dev lo parent 1: protocol arp basic \
+		action mirred egress redirect dev veth2
+	ip netns exec $ns2 tc filter add dev lo parent 1: protocol ip basic \
+		action mirred egress redirect dev veth2
+
+	# 2. (ns1) redirect veth1's ingress to lo's ingress
+	$NS_EXEC tc qdisc add dev veth1 ingress
+	$NS_EXEC tc filter add dev veth1 ingress protocol arp basic \
+		action mirred ingress redirect dev lo
+	$NS_EXEC tc filter add dev veth1 ingress protocol ip basic \
+		action mirred ingress redirect dev lo
+
+	# 3. (ns1) redirect lo's egress to veth1's egress
+	$NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel
+	$NS_EXEC tc filter add dev lo parent 1: protocol arp basic \
+		action mirred egress redirect dev veth1
+	$NS_EXEC tc filter add dev lo parent 1: protocol ip basic \
+		action mirred egress redirect dev veth1
+
+	# 4. (ns2) redirect veth2's ingress to lo's ingress
+	ip netns exec $ns2 tc qdisc add dev veth2 ingress
+	ip netns exec $ns2 tc filter add dev veth2 ingress protocol arp basic \
+		action mirred ingress redirect dev lo
+	ip netns exec $ns2 tc filter add dev veth2 ingress protocol ip basic \
+		action mirred ingress redirect dev lo
+
 	$NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1
 	$NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1
 	$NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1
-
-	$NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel
-	$NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo
-	$NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo
+	ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1
+	ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.accept_local=1
+	ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1
 	set +e
 
-	run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1"
+	run_cmd "ip netns exec $ns2 ping -w1 -c1 192.0.2.1"
 	log_test $? 0 "rp_filter passes local packets"
 
-	run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1"
+	run_cmd "ip netns exec $ns2 ping -w1 -c1 127.0.0.1"
 	log_test $? 0 "rp_filter passes loopback packets"
 
 	cleanup
@@ -616,6 +651,264 @@ fib_nexthop_test()
 	cleanup
 }
 
+fib6_notify_test()
+{
+	setup
+
+	echo
+	echo "Fib6 info length calculation in route notify test"
+	set -e
+
+	for i in 10 20 30 40 50 60 70;
+	do
+		$IP link add dummy_$i type dummy
+		$IP link set dev dummy_$i up
+		$IP -6 address add 2001:$i::1/64 dev dummy_$i
+	done
+
+	$NS_EXEC ip monitor route &> errors.txt &
+	sleep 2
+
+	$IP -6 route add 2001::/64 \
+                nexthop via 2001:10::2 dev dummy_10 \
+                nexthop encap ip6 dst 2002::20 via 2001:20::2 dev dummy_20 \
+                nexthop encap ip6 dst 2002::30 via 2001:30::2 dev dummy_30 \
+                nexthop encap ip6 dst 2002::40 via 2001:40::2 dev dummy_40 \
+                nexthop encap ip6 dst 2002::50 via 2001:50::2 dev dummy_50 \
+                nexthop encap ip6 dst 2002::60 via 2001:60::2 dev dummy_60 \
+                nexthop encap ip6 dst 2002::70 via 2001:70::2 dev dummy_70
+
+	set +e
+
+	err=`cat errors.txt |grep "Message too long"`
+	if [ -z "$err" ];then
+		ret=0
+	else
+		ret=1
+	fi
+
+	log_test $ret 0 "ipv6 route add notify"
+
+	{ kill %% && wait %%; } 2>/dev/null
+
+	#rm errors.txt
+
+	cleanup &> /dev/null
+}
+
+
+fib_notify_test()
+{
+	setup
+
+	echo
+	echo "Fib4 info length calculation in route notify test"
+
+	set -e
+
+	for i in 10 20 30 40 50 60 70;
+	do
+		$IP link add dummy_$i type dummy
+		$IP link set dev dummy_$i up
+		$IP address add 20.20.$i.2/24 dev dummy_$i
+	done
+
+	$NS_EXEC ip monitor route &> errors.txt &
+	sleep 2
+
+        $IP route add 10.0.0.0/24 \
+                nexthop via 20.20.10.1 dev dummy_10 \
+                nexthop encap ip dst 192.168.10.20 via 20.20.20.1 dev dummy_20 \
+                nexthop encap ip dst 192.168.10.30 via 20.20.30.1 dev dummy_30 \
+                nexthop encap ip dst 192.168.10.40 via 20.20.40.1 dev dummy_40 \
+                nexthop encap ip dst 192.168.10.50 via 20.20.50.1 dev dummy_50 \
+                nexthop encap ip dst 192.168.10.60 via 20.20.60.1 dev dummy_60 \
+                nexthop encap ip dst 192.168.10.70 via 20.20.70.1 dev dummy_70
+
+	set +e
+
+	err=`cat errors.txt |grep "Message too long"`
+	if [ -z "$err" ];then
+		ret=0
+	else
+		ret=1
+	fi
+
+	log_test $ret 0 "ipv4 route add notify"
+
+	{ kill %% && wait %%; } 2>/dev/null
+
+	rm  errors.txt
+
+	cleanup &> /dev/null
+}
+
+# Create a new dummy_10 to remove all associated routes.
+reset_dummy_10()
+{
+	$IP link del dev dummy_10
+
+	$IP link add dummy_10 type dummy
+	$IP link set dev dummy_10 up
+	$IP -6 address add 2001:10::1/64 dev dummy_10
+}
+
+check_rt_num()
+{
+    local expected=$1
+    local num=$2
+
+    if [ $num -ne $expected ]; then
+	echo "FAIL: Expected $expected routes, got $num"
+	ret=1
+    else
+	ret=0
+    fi
+}
+
+check_rt_num_clean()
+{
+    local expected=$1
+    local num=$2
+
+    if [ $num -ne $expected ]; then
+	log_test 1 0 "expected $expected routes, got $num"
+	set +e
+	cleanup &> /dev/null
+	return 1
+    fi
+    return 0
+}
+
+fib6_gc_test()
+{
+	setup
+
+	echo
+	echo "Fib6 garbage collection test"
+	set -e
+
+	EXPIRE=5
+	GC_WAIT_TIME=$((EXPIRE * 2 + 2))
+
+	# Check expiration of routes every $EXPIRE seconds (GC)
+	$NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE
+
+	$IP link add dummy_10 type dummy
+	$IP link set dev dummy_10 up
+	$IP -6 address add 2001:10::1/64 dev dummy_10
+
+	$NS_EXEC sysctl -wq net.ipv6.route.flush=1
+
+	# Temporary routes
+	for i in $(seq 1 5); do
+	    # Expire route after $EXPIRE seconds
+	    $IP -6 route add 2001:20::$i \
+		via 2001:10::2 dev dummy_10 expires $EXPIRE
+	done
+	sleep $GC_WAIT_TIME
+	$NS_EXEC sysctl -wq net.ipv6.route.flush=1
+	check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+	log_test $ret 0 "ipv6 route garbage collection"
+
+	reset_dummy_10
+
+	# Permanent routes
+	for i in $(seq 1 5); do
+	    $IP -6 route add 2001:30::$i \
+		via 2001:10::2 dev dummy_10
+	done
+	# Temporary routes
+	for i in $(seq 1 5); do
+	    # Expire route after $EXPIRE seconds
+	    $IP -6 route add 2001:20::$i \
+		via 2001:10::2 dev dummy_10 expires $EXPIRE
+	done
+	# Wait for GC
+	sleep $GC_WAIT_TIME
+	check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+	log_test $ret 0 "ipv6 route garbage collection (with permanent routes)"
+
+	reset_dummy_10
+
+	# Permanent routes
+	for i in $(seq 1 5); do
+	    $IP -6 route add 2001:20::$i \
+		via 2001:10::2 dev dummy_10
+	done
+	# Replace with temporary routes
+	for i in $(seq 1 5); do
+	    # Expire route after $EXPIRE seconds
+	    $IP -6 route replace 2001:20::$i \
+		via 2001:10::2 dev dummy_10 expires $EXPIRE
+	done
+	# Wait for GC
+	sleep $GC_WAIT_TIME
+	check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+	log_test $ret 0 "ipv6 route garbage collection (replace with expires)"
+
+	reset_dummy_10
+
+	# Temporary routes
+	for i in $(seq 1 5); do
+	    # Expire route after $EXPIRE seconds
+	    $IP -6 route add 2001:20::$i \
+		via 2001:10::2 dev dummy_10 expires $EXPIRE
+	done
+	# Replace with permanent routes
+	for i in $(seq 1 5); do
+	    $IP -6 route replace 2001:20::$i \
+		via 2001:10::2 dev dummy_10
+	done
+	check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return
+
+	# Wait for GC
+	sleep $GC_WAIT_TIME
+	check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l)
+	log_test $ret 0 "ipv6 route garbage collection (replace with permanent)"
+
+	# ra6 is required for the next test. (ipv6toolkit)
+	if [ ! -x "$(command -v ra6)" ]; then
+	    echo "SKIP: ra6 not found."
+	    set +e
+	    cleanup &> /dev/null
+	    return
+	fi
+
+	# Delete dummy_10 and remove all routes
+	$IP link del dev dummy_10
+
+	# Create a pair of veth devices to send a RA message from one
+	# device to another.
+	$IP link add veth1 type veth peer name veth2
+	$IP link set dev veth1 up
+	$IP link set dev veth2 up
+	$IP -6 address add 2001:10::1/64 dev veth1 nodad
+	$IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+	# Make veth1 ready to receive RA messages.
+	$NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+	# Send a RA message with a route from veth2 to veth1.
+	$NS_EXEC ra6 -i veth2 -d 2001:10::1 -t $EXPIRE
+
+	# Wait for the RA message.
+	sleep 1
+
+	# systemd may mess up the test.  You syould make sure that
+	# systemd-networkd.service and systemd-networkd.socket are stopped.
+	check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return
+
+	# Wait for GC
+	sleep $GC_WAIT_TIME
+	check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+	log_test $ret 0 "ipv6 route garbage collection (RA message)"
+
+	set +e
+
+	cleanup &> /dev/null
+}
+
 fib_suppress_test()
 {
 	echo
@@ -762,34 +1055,32 @@ route_setup()
 	[ "${VERBOSE}" = "1" ] && set -x
 	set -e
 
-	ip netns add ns2
-	ip netns set ns2 auto
-	ip -netns ns2 link set dev lo up
-	ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
-	ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
+	setup_ns ns2
+	ip netns exec $ns2 sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
 
 	$IP li add veth1 type veth peer name veth2
 	$IP li add veth3 type veth peer name veth4
 
 	$IP li set veth1 up
 	$IP li set veth3 up
-	$IP li set veth2 netns ns2 up
-	$IP li set veth4 netns ns2 up
-	ip -netns ns2 li add dummy1 type dummy
-	ip -netns ns2 li set dummy1 up
+	$IP li set veth2 netns $ns2 up
+	$IP li set veth4 netns $ns2 up
+	ip -netns $ns2 li add dummy1 type dummy
+	ip -netns $ns2 li set dummy1 up
 
 	$IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad
 	$IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad
 	$IP addr add 172.16.101.1/24 dev veth1
 	$IP addr add 172.16.103.1/24 dev veth3
 
-	ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
-	ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
-	ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
+	ip -netns $ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
+	ip -netns $ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
+	ip -netns $ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
 
-	ip -netns ns2 addr add 172.16.101.2/24 dev veth2
-	ip -netns ns2 addr add 172.16.103.2/24 dev veth4
-	ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
+	ip -netns $ns2 addr add 172.16.101.2/24 dev veth2
+	ip -netns $ns2 addr add 172.16.103.2/24 dev veth4
+	ip -netns $ns2 addr add 172.16.104.1/24 dev dummy1
 
 	set +e
 }
@@ -949,12 +1240,25 @@ ipv6_rt_replace()
 	ipv6_rt_replace_mpath
 }
 
+ipv6_rt_dsfield()
+{
+	echo
+	echo "IPv6 route with dsfield tests"
+
+	run_cmd "$IP -6 route flush 2001:db8:102::/64"
+
+	# IPv6 doesn't support routing based on dsfield
+	run_cmd "$IP -6 route add 2001:db8:102::/64 dsfield 0x04 via 2001:db8:101::2"
+	log_test $? 2 "Reject route with dsfield"
+}
+
 ipv6_route_test()
 {
 	route_setup
 
 	ipv6_rt_add
 	ipv6_rt_replace
+	ipv6_rt_dsfield
 
 	route_cleanup
 }
@@ -1028,7 +1332,7 @@ ipv6_addr_metric_test()
 	log_test $rc 0 "Modify metric of address"
 
 	# verify prefix route removed on down
-	run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+	run_cmd "ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
 	run_cmd "$IP li set dev dummy2 down"
 	rc=$?
 	if [ $rc -eq 0 ]; then
@@ -1055,7 +1359,6 @@ ipv6_addr_metric_test()
 
 	check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
 	log_test $? 0 "Set metric with peer route on local side"
-	log_test $? 0 "User specified metric on local address"
 	check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
 	log_test $? 0 "Set metric with peer route on peer side"
 
@@ -1135,7 +1438,7 @@ ipv6_route_metrics_test()
 	log_test $rc 0 "Multipath route with mtu metric"
 
 	$IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
-	run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
+	run_cmd "ip netns exec $ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
 	log_test $? 0 "Using route with mtu metric"
 
 	run_cmd "$IP -6 ro add 2001:db8:114::/64 via  2001:db8:101::2  congctl lock foo"
@@ -1385,12 +1688,113 @@ ipv4_rt_replace()
 	ipv4_rt_replace_mpath
 }
 
+# checks that cached input route on VRF port is deleted
+# when VRF is deleted
+ipv4_local_rt_cache()
+{
+	run_cmd "ip addr add 10.0.0.1/32 dev lo"
+	run_cmd "setup_ns test-ns"
+	run_cmd "ip link add veth-outside type veth peer name veth-inside"
+	run_cmd "ip link add vrf-100 type vrf table 1100"
+	run_cmd "ip link set veth-outside master vrf-100"
+	run_cmd "ip link set veth-inside netns $test-ns"
+	run_cmd "ip link set veth-outside up"
+	run_cmd "ip link set vrf-100 up"
+	run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
+	run_cmd "ip netns exec $test-ns ip link set veth-inside up"
+	run_cmd "ip netns exec $test-ns ip addr add 10.1.1.1/32 dev veth-inside"
+	run_cmd "ip netns exec $test-ns ip route add 10.0.0.1/32 dev veth-inside"
+	run_cmd "ip netns exec $test-ns ip route add default via 10.0.0.1"
+	run_cmd "ip netns exec $test-ns ping 10.0.0.1 -c 1 -i 1"
+	run_cmd "ip link delete vrf-100"
+
+	# if we do not hang test is a success
+	log_test $? 0 "Cached route removed from VRF port device"
+}
+
+ipv4_rt_dsfield()
+{
+	echo
+	echo "IPv4 route with dsfield tests"
+
+	run_cmd "$IP route flush 172.16.102.0/24"
+
+	# New routes should reject dsfield options that interfere with ECN
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x01 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x01"
+
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x02 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x02"
+
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x03 via 172.16.101.2"
+	log_test $? 2 "Reject route with dsfield 0x03"
+
+	# A generic route that doesn't take DSCP into account
+	run_cmd "$IP route add 172.16.102.0/24 via 172.16.101.2"
+
+	# A more specific route for DSCP 0x10
+	run_cmd "$IP route add 172.16.102.0/24 dsfield 0x10 via 172.16.103.2"
+
+	# DSCP 0x10 should match the specific route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
+		grep -q "via 172.16.103.2"
+	log_test $? 0 "IPv4 route with DSCP and ECN:CE"
+
+	# Unknown DSCP should match the generic route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
+
+	# Null DSCP should match the generic route, no matter the ECN bits
+	$IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
+
+	$IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
+		grep -q "via 172.16.101.2"
+	log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
+}
+
 ipv4_route_test()
 {
 	route_setup
 
 	ipv4_rt_add
 	ipv4_rt_replace
+	ipv4_local_rt_cache
+	ipv4_rt_dsfield
 
 	route_cleanup
 }
@@ -1531,7 +1935,7 @@ ipv4_route_metrics_test()
 	log_test $rc 0 "Multipath route with mtu metric"
 
 	$IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300
-	run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1"
+	run_cmd "ip netns exec $ns1 ping -w1 -c1 -s 1500 172.16.104.1"
 	log_test $? 0 "Using route with mtu metric"
 
 	run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo"
@@ -1559,13 +1963,21 @@ ipv4_del_addr_test()
 
 	$IP addr add dev dummy1 172.16.104.1/24
 	$IP addr add dev dummy1 172.16.104.11/24
+	$IP addr add dev dummy1 172.16.104.12/24
+	$IP addr add dev dummy1 172.16.104.13/24
 	$IP addr add dev dummy2 172.16.104.1/24
 	$IP addr add dev dummy2 172.16.104.11/24
+	$IP addr add dev dummy2 172.16.104.12/24
 	$IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+	$IP route add 172.16.106.0/24 dev lo src 172.16.104.12
+	$IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13
 	$IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+	$IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
 	set +e
 
 	# removing address from device in vrf should only remove route from vrf table
+	echo "    Regular FIB info"
+
 	$IP addr del dev dummy2 172.16.104.11/24
 	$IP ro ls vrf red | grep -q 172.16.105.0/24
 	log_test $? 1 "Route removed from VRF when source address deleted"
@@ -1583,11 +1995,189 @@ ipv4_del_addr_test()
 	$IP ro ls vrf red | grep -q 172.16.105.0/24
 	log_test $? 0 "Route in VRF is not removed by address delete"
 
+	# removing address from device in vrf should only remove route from vrf
+	# table even when the associated fib info only differs in table ID
+	echo "    Identical FIB info with different table ID"
+
+	$IP addr del dev dummy2 172.16.104.12/24
+	$IP ro ls vrf red | grep -q 172.16.106.0/24
+	log_test $? 1 "Route removed from VRF when source address deleted"
+
+	$IP ro ls | grep -q 172.16.106.0/24
+	log_test $? 0 "Route in default VRF not removed"
+
+	$IP addr add dev dummy2 172.16.104.12/24
+	$IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
+
+	$IP addr del dev dummy1 172.16.104.12/24
+	$IP ro ls | grep -q 172.16.106.0/24
+	log_test $? 1 "Route removed in default VRF when source address deleted"
+
+	$IP ro ls vrf red | grep -q 172.16.106.0/24
+	log_test $? 0 "Route in VRF is not removed by address delete"
+
+	# removing address from device in default vrf should remove route from
+	# the default vrf even when route was inserted with a table ID of 0.
+	echo "    Table ID 0"
+
+	$IP addr del dev dummy1 172.16.104.13/24
+	$IP ro ls | grep -q 172.16.107.0/24
+	log_test $? 1 "Route removed in default VRF when source address deleted"
+
 	$IP li del dummy1
 	$IP li del dummy2
 	cleanup
 }
 
+ipv6_del_addr_test()
+{
+	echo
+	echo "IPv6 delete address route tests"
+
+	setup
+
+	set -e
+	for i in $(seq 6); do
+		$IP li add dummy${i} up type dummy
+	done
+
+	$IP li add red up type vrf table 1111
+	$IP ro add vrf red unreachable default
+	for i in $(seq 4 6); do
+		$IP li set dummy${i} vrf red
+	done
+
+	$IP addr add dev dummy1 fe80::1/128
+	$IP addr add dev dummy1 2001:db8:101::1/64
+	$IP addr add dev dummy1 2001:db8:101::10/64
+	$IP addr add dev dummy1 2001:db8:101::11/64
+	$IP addr add dev dummy1 2001:db8:101::12/64
+	$IP addr add dev dummy1 2001:db8:101::13/64
+	$IP addr add dev dummy1 2001:db8:101::14/64
+	$IP addr add dev dummy1 2001:db8:101::15/64
+	$IP addr add dev dummy2 fe80::1/128
+	$IP addr add dev dummy2 2001:db8:101::1/64
+	$IP addr add dev dummy2 2001:db8:101::11/64
+	$IP addr add dev dummy3 fe80::1/128
+
+	$IP addr add dev dummy4 2001:db8:101::1/64
+	$IP addr add dev dummy4 2001:db8:101::10/64
+	$IP addr add dev dummy4 2001:db8:101::11/64
+	$IP addr add dev dummy4 2001:db8:101::12/64
+	$IP addr add dev dummy4 2001:db8:101::13/64
+	$IP addr add dev dummy4 2001:db8:101::14/64
+	$IP addr add dev dummy5 2001:db8:101::1/64
+	$IP addr add dev dummy5 2001:db8:101::11/64
+
+	# Single device using src address
+	$IP route add 2001:db8:110::/64 dev dummy3 src 2001:db8:101::10
+	# Two devices with the same source address
+	$IP route add 2001:db8:111::/64 dev dummy3 src 2001:db8:101::11
+	# VRF with single device using src address
+	$IP route add vrf red 2001:db8:110::/64 dev dummy6 src 2001:db8:101::10
+	# VRF with two devices using src address
+	$IP route add vrf red 2001:db8:111::/64 dev dummy6 src 2001:db8:101::11
+	# src address and nexthop dev in same VRF
+	$IP route add 2001:db8:112::/64 dev dummy3 src 2001:db8:101::12
+	$IP route add vrf red 2001:db8:112::/64 dev dummy6 src 2001:db8:101::12
+	# src address and nexthop device in different VRF
+	$IP route add 2001:db8:113::/64 dev lo src 2001:db8:101::13
+	$IP route add vrf red 2001:db8:113::/64 dev lo src 2001:db8:101::13
+	# table ID 0
+	$IP route add table 0 2001:db8:115::/64 via 2001:db8:101::2 src 2001:db8:101::15
+	# Link local source route
+	$IP route add 2001:db8:116::/64 dev dummy2 src fe80::1
+	$IP route add 2001:db8:117::/64 dev dummy3 src fe80::1
+	set +e
+
+	echo "    Single device using src address"
+
+	$IP addr del dev dummy1 2001:db8:101::10/64
+	$IP -6 route show | grep -q "src 2001:db8:101::10 "
+	log_test $? 1 "Prefsrc removed when src address removed on other device"
+
+	echo "    Two devices with the same source address"
+
+	$IP addr del dev dummy1 2001:db8:101::11/64
+	$IP -6 route show | grep -q "src 2001:db8:101::11 "
+	log_test $? 0 "Prefsrc not removed when src address exist on other device"
+
+	$IP addr del dev dummy2 2001:db8:101::11/64
+	$IP -6 route show | grep -q "src 2001:db8:101::11 "
+	log_test $? 1 "Prefsrc removed when src address removed on all devices"
+
+	echo "    VRF with single device using src address"
+
+	$IP addr del dev dummy4 2001:db8:101::10/64
+	$IP -6 route show vrf red | grep -q "src 2001:db8:101::10 "
+	log_test $? 1 "Prefsrc removed when src address removed on other device"
+
+	echo "    VRF with two devices using src address"
+
+	$IP addr del dev dummy4 2001:db8:101::11/64
+	$IP -6 route show vrf red | grep -q "src 2001:db8:101::11 "
+	log_test $? 0 "Prefsrc not removed when src address exist on other device"
+
+	$IP addr del dev dummy5 2001:db8:101::11/64
+	$IP -6 route show vrf red | grep -q "src 2001:db8:101::11 "
+	log_test $? 1 "Prefsrc removed when src address removed on all devices"
+
+	echo "    src address and nexthop dev in same VRF"
+
+	$IP addr del dev dummy4 2001:db8:101::12/64
+	$IP -6 route show vrf red | grep -q "src 2001:db8:101::12 "
+	log_test $? 1 "Prefsrc removed from VRF when source address deleted"
+	$IP -6 route show | grep -q " src 2001:db8:101::12 "
+	log_test $? 0 "Prefsrc in default VRF not removed"
+
+	$IP addr add dev dummy4 2001:db8:101::12/64
+	$IP route replace vrf red 2001:db8:112::/64 dev dummy6 src 2001:db8:101::12
+	$IP addr del dev dummy1 2001:db8:101::12/64
+	$IP -6 route show vrf red | grep -q "src 2001:db8:101::12 "
+	log_test $? 0 "Prefsrc not removed from VRF when source address exist"
+	$IP -6 route show | grep -q " src 2001:db8:101::12 "
+	log_test $? 1 "Prefsrc in default VRF removed"
+
+	echo "    src address and nexthop device in different VRF"
+
+	$IP addr del dev dummy4 2001:db8:101::13/64
+	$IP -6 route show vrf red | grep -q "src 2001:db8:101::13 "
+	log_test $? 0 "Prefsrc not removed from VRF when nexthop dev in diff VRF"
+	$IP -6 route show | grep -q "src 2001:db8:101::13 "
+	log_test $? 0 "Prefsrc not removed in default VRF"
+
+	$IP addr add dev dummy4 2001:db8:101::13/64
+	$IP addr del dev dummy1 2001:db8:101::13/64
+	$IP -6 route show vrf red | grep -q "src 2001:db8:101::13 "
+	log_test $? 1 "Prefsrc removed from VRF when nexthop dev in diff VRF"
+	$IP -6 route show | grep -q "src 2001:db8:101::13 "
+	log_test $? 1 "Prefsrc removed in default VRF"
+
+	echo "    Table ID 0"
+
+	$IP addr del dev dummy1 2001:db8:101::15/64
+	$IP -6 route show | grep -q "src 2001:db8:101::15"
+	log_test $? 1 "Prefsrc removed from default VRF when source address deleted"
+
+	echo "    Link local source route"
+	$IP addr del dev dummy1 fe80::1/128
+	$IP -6 route show | grep -q "2001:db8:116::/64 dev dummy2 src fe80::1"
+	log_test $? 0 "Prefsrc not removed when delete ll addr from other dev"
+	$IP addr del dev dummy2 fe80::1/128
+	$IP -6 route show | grep -q "2001:db8:116::/64 dev dummy2 src fe80::1"
+	log_test $? 1 "Prefsrc removed when delete ll addr"
+	$IP -6 route show | grep -q "2001:db8:117::/64 dev dummy3 src fe80::1"
+	log_test $? 0 "Prefsrc not removed when delete ll addr from other dev"
+	$IP addr add dev dummy1 fe80::1/128
+	$IP addr del dev dummy3 fe80::1/128
+	$IP -6 route show | grep -q "2001:db8:117::/64 dev dummy3 src fe80::1"
+	log_test $? 1 "Prefsrc removed even ll addr still exist on other dev"
+
+	for i in $(seq 6); do
+		$IP li del dummy${i}
+	done
+	cleanup
+}
 
 ipv4_route_v6_gw_test()
 {
@@ -1609,7 +2199,7 @@ ipv4_route_v6_gw_test()
 		check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1"
 	fi
 
-	run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1"
+	run_cmd "ip netns exec $ns1 ping -w1 -c1 172.16.104.1"
 	log_test $rc 0 "Single path route with IPv6 gateway - ping"
 
 	run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2"
@@ -1654,6 +2244,362 @@ ipv4_route_v6_gw_test()
 	route_cleanup
 }
 
+socat_check()
+{
+	if [ ! -x "$(command -v socat)" ]; then
+		echo "socat command not found. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+iptables_check()
+{
+	iptables -t mangle -L OUTPUT &> /dev/null
+	if [ $? -ne 0 ]; then
+		echo "iptables configuration not supported. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ip6tables_check()
+{
+	ip6tables -t mangle -L OUTPUT &> /dev/null
+	if [ $? -ne 0 ]; then
+		echo "ip6tables configuration not supported. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ipv4_mangle_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 mangling tests"
+
+	socat_check || return 1
+	iptables_check || return 1
+
+	route_setup
+	sleep 2
+
+	local tmp_file=$(mktemp)
+	ip netns exec $ns2 socat UDP4-LISTEN:54321,fork $tmp_file &
+
+	# Add a FIB rule and a route that will direct our connection to the
+	# listening server.
+	$IP rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+	$IP route add table 123 172.16.101.0/24 dev veth1
+
+	# Add an unreachable route to the main table that will block our
+	# connection in case the FIB rule is not hit.
+	$IP route add unreachable 172.16.101.2/32
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters"
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=11111"
+	log_test $? 1 "    Connection with incorrect parameters"
+
+	# Add a mangling rule and make sure connection is still successful.
+	$NS_EXEC iptables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters - mangling"
+
+	# Delete the mangling rule and make sure connection is still
+	# successful.
+	$NS_EXEC iptables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters - no mangling"
+
+	# Verify connections were indeed successful on server side.
+	[[ $(cat $tmp_file | wc -l) -eq 3 ]]
+	log_test $? 0 "    Connection check - server side"
+
+	$IP route del unreachable 172.16.101.2/32
+	$IP route del table 123 172.16.101.0/24 dev veth1
+	$IP rule del pref 100
+
+	{ kill %% && wait %%; } 2>/dev/null
+	rm $tmp_file
+
+	route_cleanup
+}
+
+ipv6_mangle_test()
+{
+	local rc
+
+	echo
+	echo "IPv6 mangling tests"
+
+	socat_check || return 1
+	ip6tables_check || return 1
+
+	route_setup
+	sleep 2
+
+	local tmp_file=$(mktemp)
+	ip netns exec $ns2 socat UDP6-LISTEN:54321,fork $tmp_file &
+
+	# Add a FIB rule and a route that will direct our connection to the
+	# listening server.
+	$IP -6 rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+	$IP -6 route add table 123 2001:db8:101::/64 dev veth1
+
+	# Add an unreachable route to the main table that will block our
+	# connection in case the FIB rule is not hit.
+	$IP -6 route add unreachable 2001:db8:101::2/128
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters"
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=11111"
+	log_test $? 1 "    Connection with incorrect parameters"
+
+	# Add a mangling rule and make sure connection is still successful.
+	$NS_EXEC ip6tables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters - mangling"
+
+	# Delete the mangling rule and make sure connection is still
+	# successful.
+	$NS_EXEC ip6tables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters - no mangling"
+
+	# Verify connections were indeed successful on server side.
+	[[ $(cat $tmp_file | wc -l) -eq 3 ]]
+	log_test $? 0 "    Connection check - server side"
+
+	$IP -6 route del unreachable 2001:db8:101::2/128
+	$IP -6 route del table 123 2001:db8:101::/64 dev veth1
+	$IP -6 rule del pref 100
+
+	{ kill %% && wait %%; } 2>/dev/null
+	rm $tmp_file
+
+	route_cleanup
+}
+
+ip_neigh_get_check()
+{
+	ip neigh help 2>&1 | grep -q 'ip neigh get'
+	if [ $? -ne 0 ]; then
+		echo "iproute2 command does not support neigh get. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ipv4_bcast_neigh_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 broadcast neighbour tests"
+
+	ip_neigh_get_check || return 1
+
+	setup
+
+	set -e
+	run_cmd "$IP neigh add 192.0.2.111 lladdr 00:11:22:33:44:55 nud perm dev dummy0"
+	run_cmd "$IP neigh add 192.0.2.255 lladdr 00:11:22:33:44:55 nud perm dev dummy0"
+
+	run_cmd "$IP neigh get 192.0.2.111 dev dummy0"
+	run_cmd "$IP neigh get 192.0.2.255 dev dummy0"
+
+	run_cmd "$IP address add 192.0.2.1/24 broadcast 192.0.2.111 dev dummy0"
+
+	run_cmd "$IP neigh add 203.0.113.111 nud failed dev dummy0"
+	run_cmd "$IP neigh add 203.0.113.255 nud failed dev dummy0"
+
+	run_cmd "$IP neigh get 203.0.113.111 dev dummy0"
+	run_cmd "$IP neigh get 203.0.113.255 dev dummy0"
+
+	run_cmd "$IP address add 203.0.113.1/24 broadcast 203.0.113.111 dev dummy0"
+	set +e
+
+	run_cmd "$IP neigh get 192.0.2.111 dev dummy0"
+	log_test $? 0 "Resolved neighbour for broadcast address"
+
+	run_cmd "$IP neigh get 192.0.2.255 dev dummy0"
+	log_test $? 0 "Resolved neighbour for network broadcast address"
+
+	run_cmd "$IP neigh get 203.0.113.111 dev dummy0"
+	log_test $? 2 "Unresolved neighbour for broadcast address"
+
+	run_cmd "$IP neigh get 203.0.113.255 dev dummy0"
+	log_test $? 2 "Unresolved neighbour for network broadcast address"
+
+	cleanup
+}
+
+mpath_dep_check()
+{
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "mausezahn command not found. Skipping test"
+		return 1
+	fi
+
+	if [ ! -x "$(command -v jq)" ]; then
+		echo "jq command not found. Skipping test"
+		return 1
+	fi
+
+	if [ ! -x "$(command -v bc)" ]; then
+		echo "bc command not found. Skipping test"
+		return 1
+	fi
+
+	if [ ! -x "$(command -v perf)" ]; then
+		echo "perf command not found. Skipping test"
+		return 1
+	fi
+
+	perf list fib:* | grep -q fib_table_lookup
+	if [ $? -ne 0 ]; then
+		echo "IPv4 FIB tracepoint not found. Skipping test"
+		return 1
+	fi
+
+	perf list fib6:* | grep -q fib6_table_lookup
+	if [ $? -ne 0 ]; then
+		echo "IPv6 FIB tracepoint not found. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+link_stats_get()
+{
+	local ns=$1; shift
+	local dev=$1; shift
+	local dir=$1; shift
+	local stat=$1; shift
+
+	ip -n $ns -j -s link show dev $dev \
+		| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
+}
+
+list_rcv_eval()
+{
+	local file=$1; shift
+	local expected=$1; shift
+
+	local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor')
+	local ratio=$(echo "scale=2; $count / $expected" | bc -l)
+	local res=$(echo "$ratio >= 0.95" | bc)
+	[[ $res -eq 1 ]]
+	log_test $? 0 "Multipath route hit ratio ($ratio)"
+}
+
+ipv4_mpath_list_test()
+{
+	echo
+	echo "IPv4 multipath list receive tests"
+
+	mpath_dep_check || return 1
+
+	route_setup
+
+	set -e
+	run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+	run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+	run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+	run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on"
+	run_cmd "ip -n $ns2 link add name nh1 up type dummy"
+	run_cmd "ip -n $ns2 link add name nh2 up type dummy"
+	run_cmd "ip -n $ns2 address add 172.16.201.1/24 dev nh1"
+	run_cmd "ip -n $ns2 address add 172.16.202.1/24 dev nh2"
+	run_cmd "ip -n $ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+	run_cmd "ip -n $ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+	run_cmd "ip -n $ns2 route add 203.0.113.0/24
+		nexthop via 172.16.201.2 nexthop via 172.16.202.2"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
+	set +e
+
+	local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+	local tmp_file=$(mktemp)
+	local cmd="ip netns exec $ns1 mausezahn veth1 -a own -b $dmac
+		-A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
+
+	# Packets forwarded in a list using a multipath route must not reuse a
+	# cached result so that a flow always hits the same nexthop. In other
+	# words, the FIB lookup tracepoint needs to be triggered for every
+	# packet.
+	local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
+	run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+	local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
+	local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+	list_rcv_eval $tmp_file $diff
+
+	rm $tmp_file
+	route_cleanup
+}
+
+ipv6_mpath_list_test()
+{
+	echo
+	echo "IPv6 multipath list receive tests"
+
+	mpath_dep_check || return 1
+
+	route_setup
+
+	set -e
+	run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+	run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+	run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+	run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on"
+	run_cmd "ip -n $ns2 link add name nh1 up type dummy"
+	run_cmd "ip -n $ns2 link add name nh2 up type dummy"
+	run_cmd "ip -n $ns2 -6 address add 2001:db8:201::1/64 dev nh1"
+	run_cmd "ip -n $ns2 -6 address add 2001:db8:202::1/64 dev nh2"
+	run_cmd "ip -n $ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+	run_cmd "ip -n $ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+	run_cmd "ip -n $ns2 -6 route add 2001:db8:301::/64
+		nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
+	set +e
+
+	local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+	local tmp_file=$(mktemp)
+	local cmd="ip netns exec $ns1 mausezahn -6 veth1 -a own -b $dmac
+		-A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
+
+	# Packets forwarded in a list using a multipath route must not reuse a
+	# cached result so that a flow always hits the same nexthop. In other
+	# words, the FIB lookup tracepoint needs to be triggered for every
+	# packet.
+	local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
+	run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+	local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
+	local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+	list_rcv_eval $tmp_file $diff
+
+	rm $tmp_file
+	route_cleanup
+}
+
 ################################################################################
 # usage
 
@@ -1673,6 +2619,8 @@ EOF
 ################################################################################
 # main
 
+trap cleanup EXIT
+
 while getopts :t:pPhv o
 do
 	case $o in
@@ -1717,15 +2665,24 @@ do
 	fib_carrier_test|carrier)	fib_carrier_test;;
 	fib_rp_filter_test|rp_filter)	fib_rp_filter_test;;
 	fib_nexthop_test|nexthop)	fib_nexthop_test;;
+	fib_notify_test|ipv4_notify)	fib_notify_test;;
+	fib6_notify_test|ipv6_notify)	fib6_notify_test;;
 	fib_suppress_test|suppress)	fib_suppress_test;;
 	ipv6_route_test|ipv6_rt)	ipv6_route_test;;
 	ipv4_route_test|ipv4_rt)	ipv4_route_test;;
 	ipv6_addr_metric)		ipv6_addr_metric_test;;
 	ipv4_addr_metric)		ipv4_addr_metric_test;;
 	ipv4_del_addr)			ipv4_del_addr_test;;
+	ipv6_del_addr)			ipv6_del_addr_test;;
 	ipv6_route_metrics)		ipv6_route_metrics_test;;
 	ipv4_route_metrics)		ipv4_route_metrics_test;;
 	ipv4_route_v6_gw)		ipv4_route_v6_gw_test;;
+	ipv4_mangle)			ipv4_mangle_test;;
+	ipv6_mangle)			ipv6_mangle_test;;
+	ipv4_bcast_neigh)		ipv4_bcast_neigh_test;;
+	fib6_gc_test|ipv6_gc)		fib6_gc_test;;
+	ipv4_mpath_list)		ipv4_mpath_list_test;;
+	ipv6_mpath_list)		ipv6_mpath_list_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 250fbb2d1625..535865b3d1d6 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -1,14 +1,40 @@
 # SPDX-License-Identifier: GPL-2.0+ OR MIT
 
-TEST_PROGS = bridge_igmp.sh \
+TEST_PROGS = bridge_fdb_learning_limit.sh \
+	bridge_igmp.sh \
+	bridge_locked_port.sh \
+	bridge_mdb.sh \
+	bridge_mdb_host.sh \
+	bridge_mdb_max.sh \
+	bridge_mdb_port_down.sh \
+	bridge_mld.sh \
 	bridge_port_isolation.sh \
 	bridge_sticky_fdb.sh \
 	bridge_vlan_aware.sh \
+	bridge_vlan_mcast.sh \
 	bridge_vlan_unaware.sh \
+	custom_multipath_hash.sh \
+	dual_vxlan_bridge.sh \
+	ethtool_extended_state.sh \
+	ethtool_mm.sh \
+	ethtool_rmon.sh \
 	ethtool.sh \
+	gre_custom_multipath_hash.sh \
 	gre_inner_v4_multipath.sh \
 	gre_inner_v6_multipath.sh \
+	gre_multipath_nh_res.sh \
+	gre_multipath_nh.sh \
 	gre_multipath.sh \
+	hw_stats_l3.sh \
+	hw_stats_l3_gre.sh \
+	ip6_forward_instats_vrf.sh \
+	ip6gre_custom_multipath_hash.sh \
+	ip6gre_flat_key.sh \
+	ip6gre_flat_keys.sh \
+	ip6gre_flat.sh \
+	ip6gre_hier_key.sh \
+	ip6gre_hier_keys.sh \
+	ip6gre_hier.sh \
 	ip6gre_inner_v4_multipath.sh \
 	ip6gre_inner_v6_multipath.sh \
 	ipip_flat_gre_key.sh \
@@ -17,6 +43,7 @@ TEST_PROGS = bridge_igmp.sh \
 	ipip_hier_gre_key.sh \
 	ipip_hier_gre_keys.sh \
 	ipip_hier_gre.sh \
+	local_termination.sh \
 	loopback.sh \
 	mirror_gre_bound.sh \
 	mirror_gre_bridge_1d.sh \
@@ -32,45 +59,78 @@ TEST_PROGS = bridge_igmp.sh \
 	mirror_gre_vlan_bridge_1q.sh \
 	mirror_gre_vlan.sh \
 	mirror_vlan.sh \
+	no_forwarding.sh \
+	pedit_dsfield.sh \
+	pedit_ip.sh \
+	pedit_l4port.sh \
+	q_in_vni_ipv6.sh \
+	q_in_vni.sh \
 	router_bridge.sh \
+	router_bridge_1d.sh \
+	router_bridge_1d_lag.sh \
+	router_bridge_lag.sh \
 	router_bridge_vlan.sh \
+	router_bridge_vlan_upper.sh \
+	router_bridge_pvid_vlan_upper.sh \
+	router_bridge_vlan_upper_pvid.sh \
 	router_broadcast.sh \
+	router_mpath_nh_res.sh \
 	router_mpath_nh.sh \
 	router_multicast.sh \
 	router_multipath.sh \
+	router_nh.sh \
 	router.sh \
 	router_vid_1.sh \
 	sch_ets.sh \
+	sch_red.sh \
 	sch_tbf_ets.sh \
 	sch_tbf_prio.sh \
 	sch_tbf_root.sh \
+	skbedit_priority.sh \
 	tc_actions.sh \
 	tc_chains.sh \
 	tc_flower_router.sh \
 	tc_flower.sh \
+	tc_flower_l2_miss.sh \
+	tc_flower_cfm.sh \
+	tc_flower_port_range.sh \
+	tc_mpls_l2vpn.sh \
+	tc_police.sh \
 	tc_shblocks.sh \
+	tc_tunnel_key.sh \
 	tc_vlan_modify.sh \
+	vxlan_asymmetric_ipv6.sh \
 	vxlan_asymmetric.sh \
+	vxlan_bridge_1d_ipv6.sh \
+	vxlan_bridge_1d_port_8472_ipv6.sh \
 	vxlan_bridge_1d_port_8472.sh \
 	vxlan_bridge_1d.sh \
+	vxlan_bridge_1q_ipv6.sh \
+	vxlan_bridge_1q_port_8472_ipv6.sh \
 	vxlan_bridge_1q_port_8472.sh \
 	vxlan_bridge_1q.sh \
+	vxlan_symmetric_ipv6.sh \
 	vxlan_symmetric.sh
 
-TEST_PROGS_EXTENDED := devlink_lib.sh \
+TEST_FILES := devlink_lib.sh \
 	ethtool_lib.sh \
 	fib_offload_lib.sh \
 	forwarding.config.sample \
+	ip6gre_lib.sh \
 	ipip_lib.sh \
 	lib.sh \
 	mirror_gre_lib.sh \
 	mirror_gre_topo_lib.sh \
 	mirror_lib.sh \
 	mirror_topo_lib.sh \
+	router_mpath_nh_lib.sh \
 	sch_ets_core.sh \
 	sch_ets_tests.sh \
 	sch_tbf_core.sh \
 	sch_tbf_etsprio.sh \
 	tc_common.sh
 
+TEST_INCLUDES := \
+	../lib.sh
+
 include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
new file mode 100755
index 000000000000..0760a34b7114
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# ShellCheck incorrectly believes that most of the code here is unreachable
+# because it's invoked by variable name following ALL_TESTS.
+#
+# shellcheck disable=SC2317
+
+ALL_TESTS="check_accounting check_limit"
+NUM_NETIFS=6
+source lib.sh
+
+TEST_MAC_BASE=de:ad:be:ef:42:
+
+NUM_PKTS=16
+FDB_LIMIT=8
+
+FDB_TYPES=(
+	# name		is counted?	overrides learned?
+	'learned	1		0'
+	'static		0		1'
+	'user		0		1'
+	'extern_learn	0		1'
+	'local		0		1'
+)
+
+mac()
+{
+	printf "${TEST_MAC_BASE}%02x" "$1"
+}
+
+H1_DEFAULT_MAC=$(mac 42)
+
+switch_create()
+{
+	ip link add dev br0 type bridge
+
+	ip link set dev "$swp1" master br0
+	ip link set dev "$swp2" master br0
+	# swp3 is used to add local MACs, so do not add it to the bridge yet.
+
+	# swp2 is only used for replying when learning on swp1, its MAC should not be learned.
+	ip link set dev "$swp2" type bridge_slave learning off
+
+	ip link set dev br0 up
+
+	ip link set dev "$swp1" up
+	ip link set dev "$swp2" up
+	ip link set dev "$swp3" up
+}
+
+switch_destroy()
+{
+	ip link set dev "$swp3" down
+	ip link set dev "$swp2" down
+	ip link set dev "$swp1" down
+
+	ip link del dev br0
+}
+
+h_create()
+{
+	ip link set "$h1" addr "$H1_DEFAULT_MAC"
+
+	simple_if_init "$h1" 192.0.2.1/24
+	simple_if_init "$h2" 192.0.2.2/24
+}
+
+h_destroy()
+{
+	simple_if_fini "$h1" 192.0.2.1/24
+	simple_if_fini "$h2" 192.0.2.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	h2=${NETIFS[p3]}
+	swp2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p6]}
+
+	vrf_prepare
+
+	h_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h_destroy
+
+	vrf_cleanup
+}
+
+fdb_get_n_learned()
+{
+	ip -d -j link show dev br0 type bridge | \
+		jq '.[]["linkinfo"]["info_data"]["fdb_n_learned"]'
+}
+
+fdb_get_n_mac()
+{
+	local mac=${1}
+
+	bridge -j fdb show br br0 | \
+		jq "map(select(.mac == \"${mac}\" and (has(\"vlan\") | not))) | length"
+}
+
+fdb_fill_learned()
+{
+	local i
+
+	for i in $(seq 1 "$NUM_PKTS"); do
+		fdb_add learned "$(mac "$i")"
+	done
+}
+
+fdb_reset()
+{
+	bridge fdb flush dev br0
+
+	# Keep the default MAC address of h1 in the table. We set it to a different one when
+	# testing dynamic learning.
+	bridge fdb add "$H1_DEFAULT_MAC" dev "$swp1" master static use
+}
+
+fdb_add()
+{
+	local type=$1 mac=$2
+
+	case "$type" in
+		learned)
+			ip link set "$h1" addr "$mac"
+			# Wait for a reply so we implicitly wait until after the forwarding
+			# code finished and the FDB entry was created.
+			PING_COUNT=1 ping_do "$h1" 192.0.2.2
+			check_err $? "Failed to ping another bridge port"
+			ip link set "$h1" addr "$H1_DEFAULT_MAC"
+			;;
+		local)
+			ip link set dev "$swp3" addr "$mac" && ip link set "$swp3" master br0
+			;;
+		static)
+			bridge fdb replace "$mac" dev "$swp1" master static
+			;;
+		user)
+			bridge fdb replace "$mac" dev "$swp1" master static use
+			;;
+		extern_learn)
+			bridge fdb replace "$mac" dev "$swp1" master extern_learn
+			;;
+	esac
+
+	check_err $? "Failed to add a FDB entry of type ${type}"
+}
+
+fdb_del()
+{
+	local type=$1 mac=$2
+
+	case "$type" in
+		local)
+			ip link set "$swp3" nomaster
+			;;
+		*)
+			bridge fdb del "$mac" dev "$swp1" master
+			;;
+	esac
+
+	check_err $? "Failed to remove a FDB entry of type ${type}"
+}
+
+check_accounting_one_type()
+{
+	local type=$1 is_counted=$2 overrides_learned=$3
+	shift 3
+	RET=0
+
+	fdb_reset
+	fdb_add "$type" "$(mac 0)"
+	learned=$(fdb_get_n_learned)
+	[ "$learned" -ne "$is_counted" ]
+	check_fail $? "Inserted FDB type ${type}: Expected the count ${is_counted}, but got ${learned}"
+
+	fdb_del "$type" "$(mac 0)"
+	learned=$(fdb_get_n_learned)
+	[ "$learned" -ne 0 ]
+	check_fail $? "Removed FDB type ${type}: Expected the count 0, but got ${learned}"
+
+	if [ "$overrides_learned" -eq 1 ]; then
+		fdb_reset
+		fdb_add learned "$(mac 0)"
+		fdb_add "$type" "$(mac 0)"
+		learned=$(fdb_get_n_learned)
+		[ "$learned" -ne "$is_counted" ]
+		check_fail $? "Set a learned entry to FDB type ${type}: Expected the count ${is_counted}, but got ${learned}"
+		fdb_del "$type" "$(mac 0)"
+	fi
+
+	log_test "FDB accounting interacting with FDB type ${type}"
+}
+
+check_accounting()
+{
+	local type_args learned
+	RET=0
+
+	fdb_reset
+	learned=$(fdb_get_n_learned)
+	[ "$learned" -ne 0 ]
+	check_fail $? "Flushed the FDB table: Expected the count 0, but got ${learned}"
+
+	fdb_fill_learned
+	sleep 1
+
+	learned=$(fdb_get_n_learned)
+	[ "$learned" -ne "$NUM_PKTS" ]
+	check_fail $? "Filled the FDB table: Expected the count ${NUM_PKTS}, but got ${learned}"
+
+	log_test "FDB accounting"
+
+	for type_args in "${FDB_TYPES[@]}"; do
+		# This is intentional use of word splitting.
+		# shellcheck disable=SC2086
+		check_accounting_one_type $type_args
+	done
+}
+
+check_limit_one_type()
+{
+	local type=$1 is_counted=$2
+	local n_mac expected=$((1 - is_counted))
+	RET=0
+
+	fdb_reset
+	fdb_fill_learned
+
+	fdb_add "$type" "$(mac 0)"
+	n_mac=$(fdb_get_n_mac "$(mac 0)")
+	[ "$n_mac" -ne "$expected" ]
+	check_fail $? "Inserted FDB type ${type} at limit: Expected the count ${expected}, but got ${n_mac}"
+
+	log_test "FDB limits interacting with FDB type ${type}"
+}
+
+check_limit()
+{
+	local learned
+	RET=0
+
+	ip link set br0 type bridge fdb_max_learned "$FDB_LIMIT"
+
+	fdb_reset
+	fdb_fill_learned
+
+	learned=$(fdb_get_n_learned)
+	[ "$learned" -ne "$FDB_LIMIT" ]
+	check_fail $? "Filled the limited FDB table: Expected the count ${FDB_LIMIT}, but got ${learned}"
+
+	log_test "FDB limits"
+
+	for type_args in "${FDB_TYPES[@]}"; do
+		# This is intentional use of word splitting.
+		# shellcheck disable=SC2086
+		check_limit_one_type $type_args
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 88d2472ba151..2aa66d2a1702 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,11 +1,37 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="reportleave_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
+	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
+	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
+	   v3exc_timeout_test v3star_ex_auto_add_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
 TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+
+ALL_GROUP="224.0.0.1"
+ALL_MAC="01:00:5e:00:00:01"
+
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3
+MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30
+MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30
+MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
+MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30
+MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
+
 source lib.sh
 
 h1_create()
@@ -70,47 +96,13 @@ cleanup()
 
 	switch_destroy
 
-	# Always cleanup the mcast group
-	ip address del dev $h2 $TEST_GROUP/32 2>&1 1>/dev/null
-
 	h2_destroy
 	h1_destroy
 
 	vrf_cleanup
 }
 
-# return 0 if the packet wasn't seen on host2_if or 1 if it was
-mcast_packet_test()
-{
-	local mac=$1
-	local ip=$2
-	local host1_if=$3
-	local host2_if=$4
-	local seen=0
-
-	# Add an ACL on `host2_if` which will tell us whether the packet
-	# was received by it or not.
-	tc qdisc add dev $host2_if ingress
-	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
-		flower dst_mac $mac action drop
-
-	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q
-	sleep 1
-
-	tc -j -s filter show dev $host2_if ingress \
-		| jq -e ".[] | select(.options.handle == 101) \
-		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
-	if [[ $? -eq 0 ]]; then
-		seen=1
-	fi
-
-	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
-	tc qdisc del dev $host2_if ingress
-
-	return $seen
-}
-
-reportleave_test()
+v2reportleave_test()
 {
 	RET=0
 	ip address add dev $h2 $TEST_GROUP/32 autojoin
@@ -118,12 +110,12 @@ reportleave_test()
 
 	sleep 5
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
-	check_err $? "Report didn't create mdb entry for $TEST_GROUP"
+	check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
 
-	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+	mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
 	check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
 
-	log_test "IGMP report $TEST_GROUP"
+	log_test "IGMPv2 report $TEST_GROUP"
 
 	RET=0
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
@@ -136,10 +128,430 @@ reportleave_test()
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
 	check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP"
 
-	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+	mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
 	check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry"
 
-	log_test "IGMP leave $TEST_GROUP"
+	log_test "IGMPv2 leave $TEST_GROUP"
+}
+
+v3include_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+	ip link set dev br0 type bridge mcast_igmp_version 3
+	check_err $? "Could not change bridge IGMP version to 3"
+
+	$MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+	check_err $? "Missing *,G entry with source list"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+	brmcast_check_sg_entries "is_include" "${X[@]}"
+}
+
+v3exclude_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local pkt=$4
+	local X=("192.0.2.1" "192.0.2.2")
+	local Y=("192.0.2.20" "192.0.2.21")
+
+	v3include_prepare $host1_if $mac $group
+
+	$MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.3\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+}
+
+v3cleanup()
+{
+	local port=$1
+	local group=$2
+
+	bridge mdb del dev br0 port $port grp $group
+	ip link set dev br0 type bridge mcast_igmp_version 2
+}
+
+v3include_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_allow_test()
+{
+	RET=0
+	local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q
+	sleep 1
+	brmcast_check_sg_entries "allow" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> allow"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_is_include_test()
+{
+	RET=0
+	local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q
+	sleep 1
+	brmcast_check_sg_entries "is_include" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_is_exclude_test()
+{
+	RET=0
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> is_exclude"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_to_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.1")
+	local Y=("192.0.2.20" "192.0.2.30")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.21\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_allow_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+	local Y=("192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+	sleep 1
+	brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> allow"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_is_include_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+	local Y=("192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q
+	sleep 1
+	brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_is_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.30")
+	local Y=("192.0.2.20")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q
+	sleep 1
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_to_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.30")
+	local Y=("192.0.2.20")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+	sleep 1
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_block_test()
+{
+	RET=0
+	local X=("192.0.2.2" "192.0.2.3")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+	# make sure the lowered timers have expired (by default 2 seconds)
+	sleep 3
+	brmcast_check_sg_entries "block" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> block"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_block_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.30")
+	local Y=("192.0.2.20" "192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+	sleep 1
+	brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> block"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_timeout_test()
+{
+	RET=0
+	local X=("192.0.2.20" "192.0.2.30")
+
+	# GMI should be 3 seconds
+	ip link set dev br0 type bridge mcast_query_interval 100 \
+					mcast_query_response_interval 100 \
+					mcast_membership_interval 300
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+	ip link set dev br0 type bridge mcast_query_interval 500 \
+					mcast_query_response_interval 500 \
+					mcast_membership_interval 1500
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+	sleep 3
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+
+	brmcast_check_sg_entries "allow" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 192.0.2.100
+
+	log_test "IGMPv3 group $TEST_GROUP exclude timeout"
+
+	ip link set dev br0 type bridge mcast_query_interval 12500 \
+					mcast_query_response_interval 1000 \
+					mcast_membership_interval 26000
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3star_ex_auto_add_test()
+{
+	RET=0
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+				.port == \"$swp1\")" &>/dev/null
+	check_err $? "S,G entry for *,G port doesn't exist"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+				.port == \"$swp1\" and \
+				.flags[] == \"added_by_star_ex\")" &>/dev/null
+	check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+	brmcast_check_sg_fwding 1 192.0.2.3
+
+	log_test "IGMPv3 S,G port entry automatic add to a *,G port"
+
+	v3cleanup $swp1 $TEST_GROUP
+	v3cleanup $swp2 $TEST_GROUP
 }
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
new file mode 100755
index 000000000000..c62331b2e006
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -0,0 +1,365 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	locked_port_ipv4
+	locked_port_ipv6
+	locked_port_vlan
+	locked_port_mab
+	locked_port_mab_roam
+	locked_port_mab_config
+	locked_port_mab_flush
+	locked_port_mab_redirect
+"
+
+NUM_NETIFS=4
+CHECK_TC="no"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+	vlan_create $h1 100 v$h1 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 100
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+	vlan_create $h2 100 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 100
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	bridge link set dev $swp1 learning off
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+locked_port_ipv4()
+{
+	RET=0
+
+	check_locked_port_support || return 0
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work before locking port"
+
+	bridge link set dev $swp1 locked on
+
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked after locking port, but before adding FDB entry"
+
+	bridge fdb add `mac_get $h1` dev $swp1 master static
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work after locking port and adding FDB entry"
+
+	bridge link set dev $swp1 locked off
+	bridge fdb del `mac_get $h1` dev $swp1 master static
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work after unlocking port and removing FDB entry."
+
+	log_test "Locked port ipv4"
+}
+
+locked_port_vlan()
+{
+	RET=0
+
+	check_locked_port_support || return 0
+
+	bridge vlan add vid 100 dev $swp1
+	bridge vlan add vid 100 dev $swp2
+
+	ping_do $h1.100 198.51.100.2
+	check_err $? "Ping through vlan did not work before locking port"
+
+	bridge link set dev $swp1 locked on
+	ping_do $h1.100 198.51.100.2
+	check_fail $? "Ping through vlan worked after locking port, but before adding FDB entry"
+
+	bridge fdb add `mac_get $h1` dev $swp1 vlan 100 master static
+
+	ping_do $h1.100 198.51.100.2
+	check_err $? "Ping through vlan did not work after locking port and adding FDB entry"
+
+	bridge link set dev $swp1 locked off
+	bridge fdb del `mac_get $h1` dev $swp1 vlan 100 master static
+
+	ping_do $h1.100 198.51.100.2
+	check_err $? "Ping through vlan did not work after unlocking port and removing FDB entry"
+
+	bridge vlan del vid 100 dev $swp1
+	bridge vlan del vid 100 dev $swp2
+	log_test "Locked port vlan"
+}
+
+locked_port_ipv6()
+{
+	RET=0
+	check_locked_port_support || return 0
+
+	ping6_do $h1 2001:db8:1::2
+	check_err $? "Ping6 did not work before locking port"
+
+	bridge link set dev $swp1 locked on
+
+	ping6_do $h1 2001:db8:1::2
+	check_fail $? "Ping6 worked after locking port, but before adding FDB entry"
+
+	bridge fdb add `mac_get $h1` dev $swp1 master static
+	ping6_do $h1 2001:db8:1::2
+	check_err $? "Ping6 did not work after locking port and adding FDB entry"
+
+	bridge link set dev $swp1 locked off
+	bridge fdb del `mac_get $h1` dev $swp1 master static
+
+	ping6_do $h1 2001:db8:1::2
+	check_err $? "Ping6 did not work after unlocking port and removing FDB entry"
+
+	log_test "Locked port ipv6"
+}
+
+locked_port_mab()
+{
+	RET=0
+	check_port_mab_support || return 0
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work before locking port"
+
+	bridge link set dev $swp1 learning on locked on
+
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked on a locked port without an FDB entry"
+
+	bridge fdb get `mac_get $h1` br br0 vlan 1 &> /dev/null
+	check_fail $? "FDB entry created before enabling MAB"
+
+	bridge link set dev $swp1 learning on locked on mab on
+
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked on MAB enabled port without an FDB entry"
+
+	bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+	check_err $? "Locked FDB entry not created"
+
+	bridge fdb replace `mac_get $h1` dev $swp1 master static
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work after replacing FDB entry"
+
+	bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+	check_fail $? "FDB entry marked as locked after replacement"
+
+	bridge fdb del `mac_get $h1` dev $swp1 master
+	bridge link set dev $swp1 learning off locked off mab off
+
+	log_test "Locked port MAB"
+}
+
+# Check that entries cannot roam to a locked port, but that entries can roam
+# to an unlocked port.
+locked_port_mab_roam()
+{
+	local mac=a0:b0:c0:c0:b0:a0
+
+	RET=0
+	check_port_mab_support || return 0
+
+	bridge link set dev $swp1 learning on locked on mab on
+
+	$MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand
+	bridge fdb get $mac br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+	check_err $? "No locked entry on first injection"
+
+	$MZ $h2 -q -c 5 -d 100msec -t udp -a $mac -b rand
+	bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp2"
+	check_err $? "Entry did not roam to an unlocked port"
+
+	bridge fdb get $mac br br0 vlan 1 | grep -q "locked"
+	check_fail $? "Entry roamed with locked flag on"
+
+	$MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand
+	bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp1"
+	check_fail $? "Entry roamed back to locked port"
+
+	bridge fdb del $mac vlan 1 dev $swp2 master
+	bridge link set dev $swp1 learning off locked off mab off
+
+	log_test "Locked port MAB roam"
+}
+
+# Check that MAB can only be enabled on a port that is both locked and has
+# learning enabled.
+locked_port_mab_config()
+{
+	RET=0
+	check_port_mab_support || return 0
+
+	bridge link set dev $swp1 learning on locked off mab on &> /dev/null
+	check_fail $? "MAB enabled while port is unlocked"
+
+	bridge link set dev $swp1 learning off locked on mab on &> /dev/null
+	check_fail $? "MAB enabled while port has learning disabled"
+
+	bridge link set dev $swp1 learning on locked on mab on
+	check_err $? "Failed to enable MAB when port is locked and has learning enabled"
+
+	bridge link set dev $swp1 learning off locked off mab off
+
+	log_test "Locked port MAB configuration"
+}
+
+# Check that locked FDB entries are flushed from a port when MAB is disabled.
+locked_port_mab_flush()
+{
+	local locked_mac1=00:01:02:03:04:05
+	local unlocked_mac1=00:01:02:03:04:06
+	local locked_mac2=00:01:02:03:04:07
+	local unlocked_mac2=00:01:02:03:04:08
+
+	RET=0
+	check_port_mab_support || return 0
+
+	bridge link set dev $swp1 learning on locked on mab on
+	bridge link set dev $swp2 learning on locked on mab on
+
+	# Create regular and locked FDB entries on each port.
+	bridge fdb add $unlocked_mac1 dev $swp1 vlan 1 master static
+	bridge fdb add $unlocked_mac2 dev $swp2 vlan 1 master static
+
+	$MZ $h1 -q -c 5 -d 100msec -t udp -a $locked_mac1 -b rand
+	bridge fdb get $locked_mac1 br br0 vlan 1 | grep "dev $swp1" | \
+		grep -q "locked"
+	check_err $? "Failed to create locked FDB entry on first port"
+
+	$MZ $h2 -q -c 5 -d 100msec -t udp -a $locked_mac2 -b rand
+	bridge fdb get $locked_mac2 br br0 vlan 1 | grep "dev $swp2" | \
+		grep -q "locked"
+	check_err $? "Failed to create locked FDB entry on second port"
+
+	# Disable MAB on the first port and check that only the first locked
+	# FDB entry was flushed.
+	bridge link set dev $swp1 mab off
+
+	bridge fdb get $unlocked_mac1 br br0 vlan 1 &> /dev/null
+	check_err $? "Regular FDB entry on first port was flushed after disabling MAB"
+
+	bridge fdb get $unlocked_mac2 br br0 vlan 1 &> /dev/null
+	check_err $? "Regular FDB entry on second port was flushed after disabling MAB"
+
+	bridge fdb get $locked_mac1 br br0 vlan 1 &> /dev/null
+	check_fail $? "Locked FDB entry on first port was not flushed after disabling MAB"
+
+	bridge fdb get $locked_mac2 br br0 vlan 1 &> /dev/null
+	check_err $? "Locked FDB entry on second port was flushed after disabling MAB"
+
+	bridge fdb del $unlocked_mac2 dev $swp2 vlan 1 master static
+	bridge fdb del $unlocked_mac1 dev $swp1 vlan 1 master static
+
+	bridge link set dev $swp2 learning on locked off mab off
+	bridge link set dev $swp1 learning off locked off mab off
+
+	log_test "Locked port MAB FDB flush"
+}
+
+# Check that traffic can be redirected from a locked bridge port and that it
+# does not create locked FDB entries.
+locked_port_mab_redirect()
+{
+	RET=0
+	check_port_mab_support || return 0
+
+	tc qdisc add dev $swp1 clsact
+	tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \
+		action mirred egress redirect dev $swp2
+	bridge link set dev $swp1 learning on locked on mab on
+
+	ping_do $h1 192.0.2.2
+	check_err $? "Ping did not work with redirection"
+
+	bridge fdb get `mac_get $h1` br br0 vlan 1 2> /dev/null | \
+		grep "dev $swp1" | grep -q "locked"
+	check_fail $? "Locked entry created for redirected traffic"
+
+	tc filter del dev $swp1 ingress protocol all pref 1 handle 101 flower
+
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked without redirection"
+
+	bridge fdb get `mac_get $h1` br br0 vlan 1 2> /dev/null | \
+		grep "dev $swp1" | grep -q "locked"
+	check_err $? "Locked entry not created after deleting filter"
+
+	bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master
+	bridge link set dev $swp1 learning off locked off mab off
+	tc qdisc del dev $swp1 clsact
+
+	log_test "Locked port MAB redirect"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
new file mode 100755
index 000000000000..d9d587454d20
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -0,0 +1,1371 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# | + $h1.10              |                          | + $h2.10               |
+# | | 192.0.2.1/28        |                          | | 192.0.2.2/28         |
+# | | 2001:db8:1::1/64    |                          | | 2001:db8:1::2/64     |
+# | |                     |                          | |                      |
+# | |  + $h1.20           |                          | |  + $h2.20            |
+# | \  | 198.51.100.1/24  |                          | \  | 198.51.100.2/24   |
+# |  \ | 2001:db8:2::1/64 |                          |  \ | 2001:db8:2::2/64  |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR0 (802.1q)             + $swp2           | |
+# | |     vid 10                                             vid 10         | |
+# | |     vid 20                                             vid 20         | |
+# | |                                                                       | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	cfg_test
+	fwd_test
+	ctrl_test
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.2/28
+	vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2
+	bridge vlan add vid 10 dev br0 self
+	bridge vlan add vid 20 dev br0 self
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp1 up
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan add vid 20 dev $swp1
+
+	ip link set dev $swp2 master br0
+	ip link set dev $swp2 up
+	bridge vlan add vid 10 dev $swp2
+	bridge vlan add vid 20 dev $swp2
+
+	tc qdisc add dev br0 clsact
+	tc qdisc add dev $h2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	tc qdisc del dev br0 clsact
+
+	bridge vlan del vid 20 dev $swp2
+	bridge vlan del vid 10 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 20 dev $swp1
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev br0 down
+	bridge vlan del vid 20 dev br0 self
+	bridge vlan del vid 10 dev br0 self
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+cfg_test_host_common()
+{
+	local name=$1; shift
+	local grp=$1; shift
+	local src=$1; shift
+	local state=$1; shift
+	local invalid_state=$1; shift
+
+	RET=0
+
+	# Check basic add, replace and delete behavior.
+	bridge mdb add dev br0 port br0 grp $grp $state vid 10
+	bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
+	check_err $? "Failed to add $name host entry"
+
+	bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
+	check_fail $? "Managed to replace $name host entry"
+
+	bridge mdb del dev br0 port br0 grp $grp $state vid 10
+	bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
+	check_fail $? "Failed to delete $name host entry"
+
+	# Check error cases.
+	bridge mdb add dev br0 port br0 grp $grp $invalid_state vid 10 \
+		&> /dev/null
+	check_fail $? "Managed to add $name host entry with a $invalid_state state"
+
+	bridge mdb add dev br0 port br0 grp $grp src $src $state vid 10 \
+		&> /dev/null
+	check_fail $? "Managed to add $name host entry with a source"
+
+	bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+		filter_mode exclude &> /dev/null
+	check_fail $? "Managed to add $name host entry with a filter mode"
+
+	bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+		source_list $src &> /dev/null
+	check_fail $? "Managed to add $name host entry with a source list"
+
+	bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+		proto 123 &> /dev/null
+	check_fail $? "Managed to add $name host entry with a protocol"
+
+	log_test "Common host entries configuration tests ($name)"
+}
+
+# Check configuration of host entries from all types.
+cfg_test_host()
+{
+	echo
+	log_info "# Host entries configuration tests"
+
+	cfg_test_host_common "IPv4" "239.1.1.1" "192.0.2.1" "temp" "permanent"
+	cfg_test_host_common "IPv6" "ff0e::1" "2001:db8:1::1" "temp" "permanent"
+	cfg_test_host_common "L2" "01:02:03:04:05:06" "00:00:00:00:00:01" \
+		"permanent" "temp"
+}
+
+cfg_test_port_common()
+{
+	local name=$1;shift
+	local grp_key=$1; shift
+
+	RET=0
+
+	# Check basic add, replace and delete behavior.
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+	bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
+	check_err $? "Failed to add $name entry"
+
+	bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
+		&> /dev/null
+	check_err $? "Failed to replace $name entry"
+
+	bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+	bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
+	check_fail $? "Failed to delete $name entry"
+
+	# Check default protocol and replacement.
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "static"
+	check_err $? "$name entry not added with default \"static\" protocol"
+
+	bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
+		proto 123
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "123"
+	check_err $? "Failed to replace protocol of $name entry"
+	bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+
+	# Check behavior when VLAN is not specified.
+	bridge mdb add dev br0 port $swp1 $grp_key permanent
+	bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
+	check_err $? "$name entry with VLAN 10 not added when VLAN was not specified"
+	bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
+	check_err $? "$name entry with VLAN 20 not added when VLAN was not specified"
+
+	bridge mdb del dev br0 port $swp1 $grp_key permanent
+	bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
+	check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified"
+	bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
+	check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified"
+
+	# Check behavior when bridge port is down.
+	ip link set dev $swp1 down
+
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+	check_err $? "Failed to add $name permanent entry when bridge port is down"
+
+	bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+
+	bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 &> /dev/null
+	check_fail $? "Managed to add $name temporary entry when bridge port is down"
+
+	ip link set dev $swp1 up
+	setup_wait_dev $swp1
+
+	# Check error cases.
+	ip link set dev br0 down
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 \
+		&> /dev/null
+	check_fail $? "Managed to add $name entry when bridge is down"
+	ip link set dev br0 up
+
+	ip link set dev br0 type bridge mcast_snooping 0
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid \
+		10 &> /dev/null
+	check_fail $? "Managed to add $name entry when multicast snooping is disabled"
+	ip link set dev br0 type bridge mcast_snooping 1
+
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 5000 \
+		&> /dev/null
+	check_fail $? "Managed to add $name entry with an invalid VLAN"
+
+	log_test "Common port group entries configuration tests ($name)"
+}
+
+src_list_create()
+{
+	local src_prefix=$1; shift
+	local num_srcs=$1; shift
+	local src_list
+	local i
+
+	for i in $(seq 1 $num_srcs); do
+		src_list=${src_list},${src_prefix}${i}
+	done
+
+	echo $src_list | cut -c 2-
+}
+
+__cfg_test_port_ip_star_g()
+{
+	local name=$1; shift
+	local grp=$1; shift
+	local invalid_grp=$1; shift
+	local src_prefix=$1; shift
+	local src1=${src_prefix}1
+	local src2=${src_prefix}2
+	local src3=${src_prefix}3
+	local max_srcs=31
+	local num_srcs
+
+	RET=0
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
+	check_err $? "Default filter mode is not \"exclude\""
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check basic add and delete behavior.
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+		source_list $src1
+	bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
+	check_err $? "(*, G) entry not created"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
+	check_err $? "(S, G) entry not created"
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+	bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
+	check_fail $? "(*, G) entry not deleted"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
+	check_fail $? "(S, G) entry not deleted"
+
+	## State (permanent / temp) tests.
+
+	# Check that group and source timer are not set for permanent entries.
+	bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
+	check_err $? "(*, G) entry not added as \"permanent\" when should"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | \
+		grep -q "permanent"
+	check_err $? "(S, G) entry not added as \"permanent\" when should"
+
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
+	check_err $? "(*, G) \"permanent\" entry has a pending group timer"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
+	check_err $? "\"permanent\" source entry has a pending source timer"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that group timer is set for temporary (*, G) EXCLUDE, but not
+	# the source timer.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
+	check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
+	check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should"
+
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
+	check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
+	check_err $? "\"blocked\" source entry has a pending source timer"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that group timer is not set for temporary (*, G) INCLUDE, but
+	# that the source timer is set.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode include source_list $src1
+
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
+	check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
+	check_err $? "(S, G) entry not added as \"temp\" when should"
+
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
+	check_err $? "(*, G) INCLUDE entry has a pending group timer"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
+	check_fail $? "Source entry does not have a pending source timer"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that group timer is never set for (S, G) entries.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode include source_list $src1
+
+	bridge -d -s mdb get dev br0 grp $grp src $src1 vid 10 | grep -q " 0.00"
+	check_err $? "(S, G) entry has a pending group timer"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	## Filter mode (include / exclude) tests.
+
+	# Check that (*, G) INCLUDE entries are added with correct filter mode
+	# and that (S, G) entries are not marked as "blocked".
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+		filter_mode include source_list $src1
+
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
+	check_err $? "(*, G) INCLUDE not added with \"include\" filter mode"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
+	check_fail $? "(S, G) entry marked as \"blocked\" when should not"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that (*, G) EXCLUDE entries are added with correct filter mode
+	# and that (S, G) entries are marked as "blocked".
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
+	check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
+	check_err $? "(S, G) entry not marked as \"blocked\" when should"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	## Protocol tests.
+
+	# Check that (*, G) and (S, G) entries are added with the specified
+	# protocol.
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+		filter_mode exclude source_list $src1 proto zebra
+
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "zebra"
+	check_err $? "(*, G) entry not added with \"zebra\" protocol"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "zebra"
+	check_err $? "(S, G) entry not marked added with \"zebra\" protocol"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	## Replace tests.
+
+	# Check that state can be modified.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \
+		filter_mode exclude source_list $src1
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
+	check_err $? "(*, G) entry not marked as \"permanent\" after replace"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "permanent"
+	check_err $? "(S, G) entry not marked as \"permanent\" after replace"
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
+	check_err $? "(*, G) entry not marked as \"temp\" after replace"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
+	check_err $? "(S, G) entry not marked as \"temp\" after replace"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that filter mode can be modified.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode include source_list $src1
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
+	check_err $? "(*, G) not marked with \"include\" filter mode after replace"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
+	check_fail $? "(S, G) marked as \"blocked\" after replace"
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
+	check_err $? "(*, G) not marked with \"exclude\" filter mode after replace"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
+	check_err $? "(S, G) not marked as \"blocked\" after replace"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that sources can be added to and removed from the source list.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1,$src2,$src3
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
+	check_err $? "(S, G) entry for source $src1 not created after replace"
+	bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
+	check_err $? "(S, G) entry for source $src2 not created after replace"
+	bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
+	check_err $? "(S, G) entry for source $src3 not created after replace"
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1,$src3
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
+	check_err $? "(S, G) entry for source $src1 not created after second replace"
+	bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
+	check_fail $? "(S, G) entry for source $src2 created after second replace"
+	bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
+	check_err $? "(S, G) entry for source $src3 not created after second replace"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	# Check that protocol can be modified.
+	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1 proto zebra
+
+	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+		filter_mode exclude source_list $src1 proto bgp
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "bgp"
+	check_err $? "(*, G) protocol not changed to \"bgp\" after replace"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "bgp"
+	check_err $? "(S, G) protocol not changed to \"bgp\" after replace"
+
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	## Star exclude tests.
+
+	# Check star exclude functionality. When adding a new EXCLUDE (*, G),
+	# it needs to be also added to all (S, G) entries for proper
+	# replication.
+	bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
+		filter_mode include source_list $src1
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep "$swp1" | \
+		grep -q "added_by_star_ex"
+	check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry"
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+	bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10
+
+	## Error cases tests.
+
+	bridge mdb add dev br0 port $swp1 grp $invalid_grp vid 10 &> /dev/null
+	check_fail $? "Managed to add an entry with an invalid group"
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+		&> /dev/null
+	check_fail $? "Managed to add an INCLUDE entry with an empty source list"
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+		source_list $grp &> /dev/null
+	check_fail $? "Managed to add an entry with an invalid source in source list"
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+		source_list $src &> /dev/null
+	check_fail $? "Managed to add an entry with a source list and no filter mode"
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+		source_list $src1
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+		source_list $src1 &> /dev/null
+	check_fail $? "Managed to replace an entry without using replace"
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	bridge mdb add dev br0 port $swp1 grp $grp src $src2 vid 10
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+		source_list $src1,$src2,$src3 &> /dev/null
+	check_fail $? "Managed to add a source that already has a forwarding entry"
+	bridge mdb del dev br0 port $swp1 grp $grp src $src2 vid 10
+
+	# Check maximum number of sources.
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+		source_list $(src_list_create $src_prefix $max_srcs)
+	num_srcs=$(bridge -d mdb show dev br0 vid 10 | grep "$grp" | \
+		grep "src" | wc -l)
+	[[ $num_srcs -eq $max_srcs ]]
+	check_err $? "Failed to configure maximum number of sources ($max_srcs)"
+	bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+		source_list $(src_list_create $src_prefix $((max_srcs + 1))) \
+		&> /dev/null
+	check_fail $? "Managed to exceed maximum number of sources ($max_srcs)"
+
+	log_test "$name (*, G) port group entries configuration tests"
+}
+
+cfg_test_port_ip_star_g()
+{
+	echo
+	log_info "# Port group entries configuration tests - (*, G)"
+
+	cfg_test_port_common "IPv4 (*, G)" "grp 239.1.1.1"
+	cfg_test_port_common "IPv6 (*, G)" "grp ff0e::1"
+	__cfg_test_port_ip_star_g "IPv4" "239.1.1.1" "224.0.0.1" "192.0.2."
+	__cfg_test_port_ip_star_g "IPv6" "ff0e::1" "ff02::1" "2001:db8:1::"
+}
+
+__cfg_test_port_ip_sg()
+{
+	local name=$1; shift
+	local grp=$1; shift
+	local src=$1; shift
+	local grp_key="grp $grp src $src"
+
+	RET=0
+
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "include"
+	check_err $? "Default filter mode is not \"include\""
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	# Check that entries can be added as both permanent and temp and that
+	# group timer is set correctly.
+	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
+	check_err $? "Entry not added as \"permanent\" when should"
+	bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
+	check_err $? "\"permanent\" entry has a pending group timer"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	bridge mdb add dev br0 port $swp1 $grp_key temp vid 10
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
+	check_err $? "Entry not added as \"temp\" when should"
+	bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
+	check_fail $? "\"temp\" entry has an unpending group timer"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	# Check error cases.
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 \
+		filter_mode include &> /dev/null
+	check_fail $? "Managed to add an entry with a filter mode"
+
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 \
+		filter_mode include source_list $src &> /dev/null
+	check_fail $? "Managed to add an entry with a source list"
+
+	bridge mdb add dev br0 port $swp1 grp $grp src $grp vid 10 &> /dev/null
+	check_fail $? "Managed to add an entry with an invalid source"
+
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 temp
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 permanent &> /dev/null
+	check_fail $? "Managed to replace an entry without using replace"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	# Check that we can replace available attributes.
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123
+	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "111"
+	check_err $? "Failed to replace protocol"
+
+	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
+	check_err $? "Entry not marked as \"permanent\" after replace"
+	bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
+	check_err $? "Entry has a pending group timer after replace"
+
+	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
+	check_err $? "Entry not marked as \"temp\" after replace"
+	bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
+	check_fail $? "Entry has an unpending group timer after replace"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+	# Check star exclude functionality. When adding a (S, G), all matching
+	# (*, G) ports need to be added to it.
+	bridge mdb add dev br0 port $swp2 grp $grp vid 10
+	bridge mdb add dev br0 port $swp1 $grp_key vid 10
+	bridge mdb get dev br0 $grp_key vid 10 | grep $swp2 | \
+		grep -q "added_by_star_ex"
+	check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry"
+	bridge mdb del dev br0 port $swp1 $grp_key vid 10
+	bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+	log_test "$name (S, G) port group entries configuration tests"
+}
+
+cfg_test_port_ip_sg()
+{
+	echo
+	log_info "# Port group entries configuration tests - (S, G)"
+
+	cfg_test_port_common "IPv4 (S, G)" "grp 239.1.1.1 src 192.0.2.1"
+	cfg_test_port_common "IPv6 (S, G)" "grp ff0e::1 src 2001:db8:1::1"
+	__cfg_test_port_ip_sg "IPv4" "239.1.1.1" "192.0.2.1"
+	__cfg_test_port_ip_sg "IPv6" "ff0e::1" "2001:db8:1::1"
+}
+
+cfg_test_port_ip()
+{
+	cfg_test_port_ip_star_g
+	cfg_test_port_ip_sg
+}
+
+__cfg_test_port_l2()
+{
+	local grp="01:02:03:04:05:06"
+
+	RET=0
+
+	bridge meb add dev br0 port $swp grp 00:01:02:03:04:05 \
+		permanent vid 10 &> /dev/null
+	check_fail $? "Managed to add an entry with unicast MAC"
+
+	bridge mdb add dev br0 port $swp grp $grp src 00:01:02:03:04:05 \
+		permanent vid 10 &> /dev/null
+	check_fail $? "Managed to add an entry with a source"
+
+	bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+		filter_mode include &> /dev/null
+	check_fail $? "Managed to add an entry with a filter mode"
+
+	bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+		source_list 00:01:02:03:04:05 &> /dev/null
+	check_fail $? "Managed to add an entry with a source list"
+
+	log_test "L2 (*, G) port group entries configuration tests"
+}
+
+cfg_test_port_l2()
+{
+	echo
+	log_info "# Port group entries configuration tests - L2"
+
+	cfg_test_port_common "L2 (*, G)" "grp 01:02:03:04:05:06"
+	__cfg_test_port_l2
+}
+
+# Check configuration of regular (port) entries of all types.
+cfg_test_port()
+{
+	cfg_test_port_ip
+	cfg_test_port_l2
+}
+
+ipv4_grps_get()
+{
+	local max_grps=$1; shift
+	local i
+
+	for i in $(seq 0 $((max_grps - 1))); do
+		echo "239.1.1.$i"
+	done
+}
+
+ipv6_grps_get()
+{
+	local max_grps=$1; shift
+	local i
+
+	for i in $(seq 0 $((max_grps - 1))); do
+		echo "ff0e::$(printf %x $i)"
+	done
+}
+
+l2_grps_get()
+{
+	local max_grps=$1; shift
+	local i
+
+	for i in $(seq 0 $((max_grps - 1))); do
+		echo "01:00:00:00:00:$(printf %02x $i)"
+	done
+}
+
+cfg_test_dump_common()
+{
+	local name=$1; shift
+	local fn=$1; shift
+	local max_bridges=2
+	local max_grps=256
+	local max_ports=32
+	local num_entries
+	local batch_file
+	local grp
+	local i j
+
+	RET=0
+
+	# Create net devices.
+	for i in $(seq 1 $max_bridges); do
+		ip link add name br-test${i} up type bridge vlan_filtering 1 \
+			mcast_snooping 1
+		for j in $(seq 1 $max_ports); do
+			ip link add name br-test${i}-du${j} up \
+				master br-test${i} type dummy
+		done
+	done
+
+	# Create batch file with MDB entries.
+	batch_file=$(mktemp)
+	for i in $(seq 1 $max_bridges); do
+		for j in $(seq 1 $max_ports); do
+			for grp in $($fn $max_grps); do
+				echo "mdb add dev br-test${i} \
+					port br-test${i}-du${j} grp $grp \
+					permanent vid 1" >> $batch_file
+			done
+		done
+	done
+
+	# Program the batch file and check for expected number of entries.
+	bridge -b $batch_file
+	for i in $(seq 1 $max_bridges); do
+		num_entries=$(bridge mdb show dev br-test${i} | \
+			grep "permanent" | wc -l)
+		[[ $num_entries -eq $((max_grps * max_ports)) ]]
+		check_err $? "Wrong number of entries in br-test${i}"
+	done
+
+	# Cleanup.
+	rm $batch_file
+	for i in $(seq 1 $max_bridges); do
+		ip link del dev br-test${i}
+		for j in $(seq $max_ports); do
+			ip link del dev br-test${i}-du${j}
+		done
+	done
+
+	log_test "$name large scale dump tests"
+}
+
+# Check large scale dump.
+cfg_test_dump()
+{
+	echo
+	log_info "# Large scale dump tests"
+
+	cfg_test_dump_common "IPv4" ipv4_grps_get
+	cfg_test_dump_common "IPv6" ipv6_grps_get
+	cfg_test_dump_common "L2" l2_grps_get
+}
+
+# Check flush functionality with different parameters.
+cfg_test_flush()
+{
+	local num_entries
+
+	# Add entries with different attributes and check that they are all
+	# flushed when the flush command is given with no parameters.
+
+	# Different port.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.2 vid 10
+
+	# Different VLAN ID.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.3 vid 10
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.4 vid 20
+
+	# Different routing protocol.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.5 vid 10 proto bgp
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.6 vid 10 proto zebra
+
+	# Different state.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.7 vid 10 permanent
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp
+
+	bridge mdb flush dev br0
+	num_entries=$(bridge mdb show dev br0 | wc -l)
+	[[ $num_entries -eq 0 ]]
+	check_err $? 0 "Not all entries flushed after flush all"
+
+	# Check that when flushing by port only entries programmed with the
+	# specified port are flushed and the rest are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10
+
+	bridge mdb flush dev br0 port $swp1
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+	check_fail $? "Entry not flushed by specified port"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+	check_err $? "Entry flushed by wrong port"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+	check_err $? "Host entry flushed by wrong port"
+
+	bridge mdb flush dev br0 port br0
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+	check_fail $? "Host entry not flushed by specified port"
+
+	bridge mdb flush dev br0
+
+	# Check that when flushing by VLAN ID only entries programmed with the
+	# specified VLAN ID are flushed and the rest are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 20
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20
+
+	bridge mdb flush dev br0 vid 10
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+	check_fail $? "Entry not flushed by specified VLAN ID"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 20 &> /dev/null
+	check_err $? "Entry flushed by wrong VLAN ID"
+
+	bridge mdb flush dev br0
+
+	# Check that all permanent entries are flushed when "permanent" is
+	# specified and that temporary entries are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10
+
+	bridge mdb flush dev br0 permanent
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+	check_fail $? "Entry not flushed by \"permanent\" state"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+	check_err $? "Entry flushed by wrong state (\"permanent\")"
+
+	bridge mdb flush dev br0
+
+	# Check that all temporary entries are flushed when "nopermanent" is
+	# specified and that permanent entries are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10
+
+	bridge mdb flush dev br0 nopermanent
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+	check_err $? "Entry flushed by wrong state (\"nopermanent\")"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+	check_fail $? "Entry not flushed by \"nopermanent\" state"
+
+	bridge mdb flush dev br0
+
+	# Check that L2 host entries are not flushed when "nopermanent" is
+	# specified, but flushed when "permanent" is specified.
+
+	bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10
+
+	bridge mdb flush dev br0 nopermanent
+
+	bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null
+	check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")"
+
+	bridge mdb flush dev br0 permanent
+
+	bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null
+	check_fail $? "L2 host entry not flushed by \"permanent\" state"
+
+	bridge mdb flush dev br0
+
+	# Check that IPv4 host entries are not flushed when "permanent" is
+	# specified, but flushed when "nopermanent" is specified.
+
+	bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10
+
+	bridge mdb flush dev br0 permanent
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+	check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")"
+
+	bridge mdb flush dev br0 nopermanent
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+	check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state"
+
+	bridge mdb flush dev br0
+
+	# Check that IPv6 host entries are not flushed when "permanent" is
+	# specified, but flushed when "nopermanent" is specified.
+
+	bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10
+
+	bridge mdb flush dev br0 permanent
+
+	bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null
+	check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")"
+
+	bridge mdb flush dev br0 nopermanent
+
+	bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null
+	check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state"
+
+	bridge mdb flush dev br0
+
+	# Check that when flushing by routing protocol only entries programmed
+	# with the specified routing protocol are flushed and the rest are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 proto bgp
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 proto zebra
+	bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10
+
+	bridge mdb flush dev br0 proto bgp
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+	check_fail $? "Entry not flushed by specified routing protocol"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+	check_err $? "Entry flushed by wrong routing protocol"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+	check_err $? "Host entry flushed by wrong routing protocol"
+
+	bridge mdb flush dev br0
+
+	# Test that an error is returned when trying to flush using unsupported
+	# parameters.
+
+	bridge mdb flush dev br0 src_vni 10 &> /dev/null
+	check_fail $? "Managed to flush by source VNI"
+
+	bridge mdb flush dev br0 dst 198.51.100.1 &> /dev/null
+	check_fail $? "Managed to flush by destination IP"
+
+	bridge mdb flush dev br0 dst_port 4789 &> /dev/null
+	check_fail $? "Managed to flush by UDP destination port"
+
+	bridge mdb flush dev br0 vni 10 &> /dev/null
+	check_fail $? "Managed to flush by destination VNI"
+
+	log_test "Flush tests"
+}
+
+cfg_test()
+{
+	cfg_test_host
+	cfg_test_port
+	cfg_test_dump
+	cfg_test_flush
+}
+
+__fwd_test_host_ip()
+{
+	local grp=$1; shift
+	local dmac=$1; shift
+	local src=$1; shift
+	local mode=$1; shift
+	local name
+	local eth_type
+
+	RET=0
+
+	if [[ $mode == "-4" ]]; then
+		name="IPv4"
+		eth_type="ipv4"
+	else
+		name="IPv6"
+		eth_type="ipv6"
+	fi
+
+	tc filter add dev br0 ingress protocol 802.1q pref 1 handle 1 flower \
+		vlan_ethtype $eth_type vlan_id 10 dst_ip $grp src_ip $src \
+		action drop
+
+	# Packet should only be flooded to multicast router ports when there is
+	# no matching MDB entry. The bridge is not configured as a multicast
+	# router port.
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
+	tc_check_packets "dev br0 ingress" 1 0
+	check_err $? "Packet locally received after flood"
+
+	# Install a regular port group entry and expect the packet to not be
+	# locally received.
+	bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
+	tc_check_packets "dev br0 ingress" 1 0
+	check_err $? "Packet locally received after installing a regular entry"
+
+	# Add a host entry and expect the packet to be locally received.
+	bridge mdb add dev br0 port br0 grp $grp temp vid 10
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
+	tc_check_packets "dev br0 ingress" 1 1
+	check_err $? "Packet not locally received after adding a host entry"
+
+	# Remove the host entry and expect the packet to not be locally
+	# received.
+	bridge mdb del dev br0 port br0 grp $grp vid 10
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
+	tc_check_packets "dev br0 ingress" 1 1
+	check_err $? "Packet locally received after removing a host entry"
+
+	bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+	tc filter del dev br0 ingress protocol 802.1q pref 1 handle 1 flower
+
+	log_test "$name host entries forwarding tests"
+}
+
+fwd_test_host_ip()
+{
+	__fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4"
+	__fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6"
+}
+
+fwd_test_host_l2()
+{
+	local dmac=01:02:03:04:05:06
+
+	RET=0
+
+	tc filter add dev br0 ingress protocol all pref 1 handle 1 flower \
+		dst_mac $dmac action drop
+
+	# Packet should be flooded and locally received when there is no
+	# matching MDB entry.
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev br0 ingress" 1 1
+	check_err $? "Packet not locally received after flood"
+
+	# Install a regular port group entry and expect the packet to not be
+	# locally received.
+	bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev br0 ingress" 1 1
+	check_err $? "Packet locally received after installing a regular entry"
+
+	# Add a host entry and expect the packet to be locally received.
+	bridge mdb add dev br0 port br0 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev br0 ingress" 1 2
+	check_err $? "Packet not locally received after adding a host entry"
+
+	# Remove the host entry and expect the packet to not be locally
+	# received.
+	bridge mdb del dev br0 port br0 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev br0 ingress" 1 2
+	check_err $? "Packet locally received after removing a host entry"
+
+	bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10
+
+	tc filter del dev br0 ingress protocol all pref 1 handle 1 flower
+
+	log_test "L2 host entries forwarding tests"
+}
+
+fwd_test_host()
+{
+	# Disable multicast router on the bridge to ensure that packets are
+	# only locally received when a matching host entry is present.
+	ip link set dev br0 type bridge mcast_router 0
+
+	fwd_test_host_ip
+	fwd_test_host_l2
+
+	ip link set dev br0 type bridge mcast_router 1
+}
+
+__fwd_test_port_ip()
+{
+	local grp=$1; shift
+	local dmac=$1; shift
+	local valid_src=$1; shift
+	local invalid_src=$1; shift
+	local mode=$1; shift
+	local filter_mode=$1; shift
+	local name
+	local eth_type
+	local src_list
+
+	RET=0
+
+	if [[ $mode == "-4" ]]; then
+		name="IPv4"
+		eth_type="ipv4"
+	else
+		name="IPv6"
+		eth_type="ipv6"
+	fi
+
+	# The valid source is the one we expect to get packets from after
+	# adding the entry.
+	if [[ $filter_mode == "include" ]]; then
+		src_list=$valid_src
+	else
+		src_list=$invalid_src
+	fi
+
+	tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 1 flower \
+		vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
+		src_ip $valid_src action drop
+	tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 2 flower \
+		vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
+		src_ip $invalid_src action drop
+
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 1 0
+	check_err $? "Packet from valid source received on H2 before adding entry"
+
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 2 0
+	check_err $? "Packet from invalid source received on H2 before adding entry"
+
+	bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
+		filter_mode $filter_mode source_list $src_list
+
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 1 1
+	check_err $? "Packet from valid source not received on H2 after adding entry"
+
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 2 0
+	check_err $? "Packet from invalid source received on H2 after adding entry"
+
+	bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
+		filter_mode exclude
+
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 1 2
+	check_err $? "Packet from valid source not received on H2 after allowing all sources"
+
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 2 1
+	check_err $? "Packet from invalid source not received on H2 after allowing all sources"
+
+	bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 1 2
+	check_err $? "Packet from valid source received on H2 after deleting entry"
+
+	$MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+	tc_check_packets "dev $h2 ingress" 2 1
+	check_err $? "Packet from invalid source received on H2 after deleting entry"
+
+	tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 2 flower
+	tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 1 flower
+
+	log_test "$name port group \"$filter_mode\" entries forwarding tests"
+}
+
+fwd_test_port_ip()
+{
+	__fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+	__fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+		"exclude"
+	__fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include"
+	__fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+		"include"
+}
+
+fwd_test_port_l2()
+{
+	local dmac=01:02:03:04:05:06
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol all pref 1 handle 1 flower \
+		dst_mac $dmac action drop
+
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev $h2 ingress" 1 0
+	check_err $? "Packet received on H2 before adding entry"
+
+	bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev $h2 ingress" 1 1
+	check_err $? "Packet not received on H2 after adding entry"
+
+	bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10
+	$MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+	tc_check_packets "dev $h2 ingress" 1 1
+	check_err $? "Packet received on H2 after deleting entry"
+
+	tc filter del dev $h2 ingress protocol all pref 1 handle 1 flower
+
+	log_test "L2 port entries forwarding tests"
+}
+
+fwd_test_port()
+{
+	# Disable multicast flooding to ensure that packets are only forwarded
+	# out of a port when a matching port group entry is present.
+	bridge link set dev $swp2 mcast_flood off
+
+	fwd_test_port_ip
+	fwd_test_port_l2
+
+	bridge link set dev $swp2 mcast_flood on
+}
+
+fwd_test()
+{
+	echo
+	log_info "# Forwarding tests"
+
+	# Set the Max Response Delay to 100 centiseconds (1 second) so that the
+	# bridge will start forwarding according to its MDB soon after a
+	# multicast querier is enabled.
+	ip link set dev br0 type bridge mcast_query_response_interval 100
+
+	# Forwarding according to MDB entries only takes place when the bridge
+	# detects that there is a valid querier in the network. Set the bridge
+	# as the querier and assign it a valid IPv6 link-local address to be
+	# used as the source address for MLD queries.
+	ip -6 address add fe80::1/64 nodad dev br0
+	ip link set dev br0 type bridge mcast_querier 1
+	sleep 10
+
+	fwd_test_host
+	fwd_test_port
+
+	ip link set dev br0 type bridge mcast_querier 0
+	ip -6 address del fe80::1/64 dev br0
+	ip link set dev br0 type bridge mcast_query_response_interval 1000
+}
+
+ctrl_igmpv3_is_in_test()
+{
+	RET=0
+
+	# Add a permanent entry and check that it is not affected by the
+	# received IGMP packet.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 \
+		filter_mode include source_list 192.0.2.1
+
+	# IS_IN ( 192.0.2.2 )
+	$MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
+		-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
+
+	bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
+	check_fail $? "Permanent entry affected by IGMP packet"
+
+	# Replace the permanent entry with a temporary one and check that after
+	# processing the IGMP packet, a new source is added to the list along
+	# with a new forwarding entry.
+	bridge mdb replace dev br0 port $swp1 grp 239.1.1.1 temp vid 10 \
+		filter_mode include source_list 192.0.2.1
+
+	# IS_IN ( 192.0.2.2 )
+	$MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+		-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
+
+	bridge -d mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q 192.0.2.2
+	check_err $? "Source not add to source list"
+
+	bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
+	check_err $? "(S, G) entry not created for new source"
+
+	bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
+
+	log_test "IGMPv3 MODE_IS_INCLUDE tests"
+}
+
+ctrl_mldv2_is_in_test()
+{
+	RET=0
+
+	# Add a permanent entry and check that it is not affected by the
+	# received MLD packet.
+	bridge mdb add dev br0 port $swp1 grp ff0e::1 permanent vid 10 \
+		filter_mode include source_list 2001:db8:1::1
+
+	# IS_IN ( 2001:db8:1::2 )
+	local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
+	$MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
+		-t ip hop=1,next=0,p="$p" -q
+
+	bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
+	check_fail $? "Permanent entry affected by MLD packet"
+
+	# Replace the permanent entry with a temporary one and check that after
+	# processing the MLD packet, a new source is added to the list along
+	# with a new forwarding entry.
+	bridge mdb replace dev br0 port $swp1 grp ff0e::1 temp vid 10 \
+		filter_mode include source_list 2001:db8:1::1
+
+	# IS_IN ( 2001:db8:1::2 )
+	$MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
+		-t ip hop=1,next=0,p="$p" -q
+
+	bridge -d mdb get dev br0 grp ff0e::1 vid 10 | grep -q 2001:db8:1::2
+	check_err $? "Source not add to source list"
+
+	bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
+	check_err $? "(S, G) entry not created for new source"
+
+	bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10
+
+	log_test "MLDv2 MODE_IS_INCLUDE tests"
+}
+
+ctrl_test()
+{
+	echo
+	log_info "# Control packets tests"
+
+	ctrl_igmpv3_is_in_test
+	ctrl_mldv2_is_in_test
+}
+
+if ! bridge mdb help 2>&1 | grep -q "flush"; then
+	echo "SKIP: iproute2 too old, missing bridge mdb flush support"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh
new file mode 100755
index 000000000000..b1ba6876dd86
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that adding host mdb entries work as intended for all types of
+# multicast filters: ipv4, ipv6, and mac
+
+ALL_TESTS="mdb_add_del_test"
+NUM_NETIFS=2
+
+TEST_GROUP_IP4="225.1.2.3"
+TEST_GROUP_IP6="ff02::42"
+TEST_GROUP_MAC="01:00:01:c0:ff:ee"
+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+switch_create()
+{
+	# Enable multicast filtering
+	ip link add dev br0 type bridge mcast_snooping 1
+
+	ip link set dev $swp1 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp1 down
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+do_mdb_add_del()
+{
+	local group=$1
+	local flag=$2
+
+	RET=0
+	bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null
+	check_err $? "Failed adding $group to br0, port br0"
+
+	if [ -z "$flag" ]; then
+	    flag="temp"
+	fi
+
+	bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null
+	check_err $? "$group not added with $flag flag"
+
+	bridge mdb del dev br0 port br0 grp $group 2>/dev/null
+	check_err $? "Failed deleting $group from br0, port br0"
+
+	bridge mdb show dev br0 | grep -q $group >/dev/null
+	check_err_fail 1 $? "$group still in mdb after delete"
+
+	log_test "MDB add/del group $group to bridge port br0"
+}
+
+mdb_add_del_test()
+{
+	do_mdb_add_del $TEST_GROUP_MAC permanent
+	do_mdb_add_del $TEST_GROUP_IP4
+	do_mdb_add_del $TEST_GROUP_IP6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
new file mode 100755
index 000000000000..3da9d93ab36f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -0,0 +1,1347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# | + $h1.10              |                          | + $h2.10               |
+# | | 192.0.2.1/28        |                          | | 192.0.2.2/28         |
+# | | 2001:db8:1::1/64    |                          | | 2001:db8:1::2/64     |
+# | |                     |                          | |                      |
+# | |  + $h1.20           |                          | |  + $h2.20            |
+# | \  | 198.51.100.1/24  |                          | \  | 198.51.100.2/24   |
+# |  \ | 2001:db8:2::1/64 |                          |  \ | 2001:db8:2::2/64  |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR0 (802.1q)             + $swp2           | |
+# | |     vid 10                                             vid 10         | |
+# | |     vid 20                                             vid 20         | |
+# | |                                                                       | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_8021d
+	test_8021q
+	test_8021qvs
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.2/28
+	vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+switch_create_8021d()
+{
+	log_info "802.1d tests"
+
+	ip link add name br0 type bridge vlan_filtering 0 \
+		mcast_snooping 1 \
+		mcast_igmp_version 3 mcast_mld_version 2
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp1 up
+	bridge link set dev $swp1 fastleave on
+
+	ip link set dev $swp2 master br0
+	ip link set dev $swp2 up
+}
+
+switch_create_8021q()
+{
+	local br_flags=$1; shift
+
+	log_info "802.1q $br_flags${br_flags:+ }tests"
+
+	ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 1 $br_flags \
+		mcast_igmp_version 3 mcast_mld_version 2
+	bridge vlan add vid 10 dev br0 self
+	bridge vlan add vid 20 dev br0 self
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp1 up
+	bridge link set dev $swp1 fastleave on
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan add vid 20 dev $swp1
+
+	ip link set dev $swp2 master br0
+	ip link set dev $swp2 up
+	bridge vlan add vid 10 dev $swp2
+	bridge vlan add vid 20 dev $swp2
+}
+
+switch_create_8021qvs()
+{
+	switch_create_8021q "mcast_vlan_snooping 1"
+	bridge vlan global set dev br0 vid 10 mcast_igmp_version 3
+	bridge vlan global set dev br0 vid 10 mcast_mld_version 2
+	bridge vlan global set dev br0 vid 20 mcast_igmp_version 3
+	bridge vlan global set dev br0 vid 20 mcast_mld_version 2
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev br0 down
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy 2>/dev/null
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+cfg_src_list()
+{
+	local IPs=("$@")
+	local IPstr=$(echo ${IPs[@]} | tr '[:space:]' , | sed 's/,$//')
+
+	echo ${IPstr:+source_list }${IPstr}
+}
+
+cfg_group_op()
+{
+	local op=$1; shift
+	local locus=$1; shift
+	local GRP=$1; shift
+	local state=$1; shift
+	local IPs=("$@")
+
+	local source_list=$(cfg_src_list ${IPs[@]})
+
+	# Everything besides `bridge mdb' uses the "dev X vid Y" syntax,
+	# so we use it here as well and convert.
+	local br_locus=$(echo "$locus" | sed 's/^dev /port /')
+
+	bridge mdb $op dev br0 $br_locus grp $GRP $state \
+	       filter_mode include $source_list
+}
+
+cfg4_entries_op()
+{
+	local op=$1; shift
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local GRP=239.1.1.${grp}
+	local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
+	cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]}
+}
+
+cfg4_entries_add()
+{
+	cfg4_entries_op add "$@"
+}
+
+cfg4_entries_del()
+{
+	cfg4_entries_op del "$@"
+}
+
+cfg6_entries_op()
+{
+	local op=$1; shift
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local GRP=ff0e::${grp}
+	local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1))))
+	cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]}
+}
+
+cfg6_entries_add()
+{
+	cfg6_entries_op add "$@"
+}
+
+cfg6_entries_del()
+{
+	cfg6_entries_op del "$@"
+}
+
+locus_dev_peer()
+{
+	local dev_kw=$1; shift
+	local dev=$1; shift
+	local vid_kw=$1; shift
+	local vid=$1; shift
+
+	echo "$h1.${vid:-10}"
+}
+
+locus_dev()
+{
+	local dev_kw=$1; shift
+	local dev=$1; shift
+
+	echo $dev
+}
+
+ctl4_entries_add()
+{
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
+	local peer=$(locus_dev_peer $locus)
+	local GRP=239.1.1.${grp}
+	local dmac=01:00:5e:01:01:$(printf "%02x" $grp)
+	$MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \
+		-t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
+	sleep 1
+
+	local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
+	if ((nn != n)); then
+		echo mcast_max_groups > /dev/stderr
+		false
+	fi
+}
+
+ctl4_entries_del()
+{
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local peer=$(locus_dev_peer $locus)
+	local GRP=239.1.1.${grp}
+	local dmac=01:00:5e:00:00:02
+	$MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+		-t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
+	sleep 1
+	! bridge mdb show dev br0 | grep -q $GRP
+}
+
+ctl6_entries_add()
+{
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1))))
+	local peer=$(locus_dev_peer $locus)
+	local SIP=fe80::1
+	local GRP=ff0e::${grp}
+	local dmac=33:33:00:00:00:$(printf "%02x" $grp)
+	local p=$(mldv2_is_in_get $SIP $GRP $IPs)
+	$MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+		-t ip hop=1,next=0,p="$p" -q
+	sleep 1
+
+	local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
+	if ((nn != n)); then
+		echo mcast_max_groups > /dev/stderr
+		false
+	fi
+}
+
+ctl6_entries_del()
+{
+	local locus=$1; shift
+	local state=$1; shift
+	local n=$1; shift
+	local grp=${1:-1}; shift
+
+	local peer=$(locus_dev_peer $locus)
+	local SIP=fe80::1
+	local GRP=ff0e::${grp}
+	local dmac=33:33:00:00:00:$(printf "%02x" $grp)
+	local p=$(mldv1_done_get $SIP $GRP)
+	$MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+		-t ip hop=1,next=0,p="$p" -q
+	sleep 1
+	! bridge mdb show dev br0 | grep -q $GRP
+}
+
+bridge_maxgroups_errmsg_check_cfg()
+{
+	local msg=$1; shift
+	local needle=$1; shift
+
+	echo "$msg" | grep -q mcast_max_groups
+	check_err $? "Adding MDB entries failed for the wrong reason: $msg"
+}
+
+bridge_maxgroups_errmsg_check_cfg4()
+{
+	bridge_maxgroups_errmsg_check_cfg "$@"
+}
+
+bridge_maxgroups_errmsg_check_cfg6()
+{
+	bridge_maxgroups_errmsg_check_cfg "$@"
+}
+
+bridge_maxgroups_errmsg_check_ctl4()
+{
+	:
+}
+
+bridge_maxgroups_errmsg_check_ctl6()
+{
+	:
+}
+
+bridge_port_ngroups_get()
+{
+	local locus=$1; shift
+
+	bridge -j -d link show $locus |
+	    jq '.[].mcast_n_groups'
+}
+
+bridge_port_maxgroups_get()
+{
+	local locus=$1; shift
+
+	bridge -j -d link show $locus |
+	    jq '.[].mcast_max_groups'
+}
+
+bridge_port_maxgroups_set()
+{
+	local locus=$1; shift
+	local max=$1; shift
+
+	bridge link set dev $(locus_dev $locus) mcast_max_groups $max
+}
+
+bridge_port_vlan_ngroups_get()
+{
+	local locus=$1; shift
+
+	bridge -j -d vlan show $locus |
+	    jq '.[].vlans[].mcast_n_groups'
+}
+
+bridge_port_vlan_maxgroups_get()
+{
+	local locus=$1; shift
+
+	bridge -j -d vlan show $locus |
+	    jq '.[].vlans[].mcast_max_groups'
+}
+
+bridge_port_vlan_maxgroups_set()
+{
+	local locus=$1; shift
+	local max=$1; shift
+
+	bridge vlan set $locus mcast_max_groups $max
+}
+
+test_ngroups_reporting()
+{
+	local CFG=$1; shift
+	local context=$1; shift
+	local locus=$1; shift
+
+	RET=0
+
+	local n0=$(bridge_${context}_ngroups_get "$locus")
+	${CFG}_entries_add "$locus" temp 5
+	check_err $? "Couldn't add MDB entries"
+	local n1=$(bridge_${context}_ngroups_get "$locus")
+
+	((n1 == n0 + 5))
+	check_err $? "Number of groups was $n0, now is $n1, but $((n0 + 5)) expected"
+
+	${CFG}_entries_del "$locus" temp 5
+	check_err $? "Couldn't delete MDB entries"
+	local n2=$(bridge_${context}_ngroups_get "$locus")
+
+	((n2 == n0))
+	check_err $? "Number of groups was $n0, now is $n2, but should be back to $n0"
+
+	log_test "$CFG: $context: ngroups reporting"
+}
+
+test_8021d_ngroups_reporting_cfg4()
+{
+	test_ngroups_reporting cfg4 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_ctl4()
+{
+	test_ngroups_reporting ctl4 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_cfg6()
+{
+	test_ngroups_reporting cfg6 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_ctl6()
+{
+	test_ngroups_reporting ctl6 port "dev $swp1"
+}
+
+test_8021q_ngroups_reporting_cfg4()
+{
+	test_ngroups_reporting cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_ctl4()
+{
+	test_ngroups_reporting ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_cfg6()
+{
+	test_ngroups_reporting cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_ctl6()
+{
+	test_ngroups_reporting ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_cfg4()
+{
+	test_ngroups_reporting cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_ctl4()
+{
+	test_ngroups_reporting ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_cfg6()
+{
+	test_ngroups_reporting cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_ctl6()
+{
+	test_ngroups_reporting ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_ngroups_cross_vlan()
+{
+	local CFG=$1; shift
+
+	local locus1="dev $swp1 vid 10"
+	local locus2="dev $swp1 vid 20"
+
+	RET=0
+
+	local n10=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n20=$(bridge_port_vlan_ngroups_get "$locus2")
+	${CFG}_entries_add "$locus1" temp 5 111
+	check_err $? "Couldn't add MDB entries to VLAN 10"
+	local n11=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n21=$(bridge_port_vlan_ngroups_get "$locus2")
+
+	((n11 == n10 + 5))
+	check_err $? "Number of groups at VLAN 10 was $n10, now is $n11, but 5 entries added on VLAN 10, $((n10 + 5)) expected"
+
+	((n21 == n20))
+	check_err $? "Number of groups at VLAN 20 was $n20, now is $n21, but no change expected on VLAN 20"
+
+	${CFG}_entries_add "$locus2" temp 5 112
+	check_err $? "Couldn't add MDB entries to VLAN 20"
+	local n12=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n22=$(bridge_port_vlan_ngroups_get "$locus2")
+
+	((n12 == n11))
+	check_err $? "Number of groups at VLAN 10 was $n11, now is $n12, but no change expected on VLAN 10"
+
+	((n22 == n21 + 5))
+	check_err $? "Number of groups at VLAN 20 was $n21, now is $n22, but 5 entries added on VLAN 20, $((n21 + 5)) expected"
+
+	${CFG}_entries_del "$locus1" temp 5 111
+	check_err $? "Couldn't delete MDB entries from VLAN 10"
+	${CFG}_entries_del "$locus2" temp 5 112
+	check_err $? "Couldn't delete MDB entries from VLAN 20"
+	local n13=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n23=$(bridge_port_vlan_ngroups_get "$locus2")
+
+	((n13 == n10))
+	check_err $? "Number of groups at VLAN 10 was $n10, now is $n13, but should be back to $n10"
+
+	((n23 == n20))
+	check_err $? "Number of groups at VLAN 20 was $n20, now is $n23, but should be back to $n20"
+
+	log_test "$CFG: port_vlan: isolation of port and per-VLAN ngroups"
+}
+
+test_8021qvs_ngroups_cross_vlan_cfg4()
+{
+	test_ngroups_cross_vlan cfg4
+}
+
+test_8021qvs_ngroups_cross_vlan_ctl4()
+{
+	test_ngroups_cross_vlan ctl4
+}
+
+test_8021qvs_ngroups_cross_vlan_cfg6()
+{
+	test_ngroups_cross_vlan cfg6
+}
+
+test_8021qvs_ngroups_cross_vlan_ctl6()
+{
+	test_ngroups_cross_vlan ctl6
+}
+
+test_maxgroups_zero()
+{
+	local CFG=$1; shift
+	local context=$1; shift
+	local locus=$1; shift
+
+	RET=0
+	local max
+
+	max=$(bridge_${context}_maxgroups_get "$locus")
+	((max == 0))
+	check_err $? "Max groups on $locus should be 0, but $max reported"
+
+	bridge_${context}_maxgroups_set "$locus" 100
+	check_err $? "Failed to set max to 100"
+	max=$(bridge_${context}_maxgroups_get "$locus")
+	((max == 100))
+	check_err $? "Max groups expected to be 100, but $max reported"
+
+	bridge_${context}_maxgroups_set "$locus" 0
+	check_err $? "Couldn't set maximum to 0"
+
+	# Test that setting 0 explicitly still serves as infinity.
+	${CFG}_entries_add "$locus" temp 5
+	check_err $? "Adding 5 MDB entries failed but should have passed"
+	${CFG}_entries_del "$locus" temp 5
+	check_err $? "Couldn't delete MDB entries"
+
+	log_test "$CFG: $context maxgroups: reporting and treatment of 0"
+}
+
+test_8021d_maxgroups_zero_cfg4()
+{
+	test_maxgroups_zero cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_ctl4()
+{
+	test_maxgroups_zero ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_cfg6()
+{
+	test_maxgroups_zero cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_ctl6()
+{
+	test_maxgroups_zero ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_zero_cfg4()
+{
+	test_maxgroups_zero cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_ctl4()
+{
+	test_maxgroups_zero ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_cfg6()
+{
+	test_maxgroups_zero cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_ctl6()
+{
+	test_maxgroups_zero ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_cfg4()
+{
+	test_maxgroups_zero cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_ctl4()
+{
+	test_maxgroups_zero ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_cfg6()
+{
+	test_maxgroups_zero cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_ctl6()
+{
+	test_maxgroups_zero ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_zero_cross_vlan()
+{
+	local CFG=$1; shift
+
+	local locus0="dev $swp1"
+	local locus1="dev $swp1 vid 10"
+	local locus2="dev $swp1 vid 20"
+	local max
+
+	RET=0
+
+	bridge_port_vlan_maxgroups_set "$locus1" 100
+	check_err $? "$locus1: Failed to set max to 100"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 0))
+	check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 0))
+	check_err $? "$locus2: Max groups expected to be 0, but $max reported"
+
+	bridge_port_vlan_maxgroups_set "$locus2" 100
+	check_err $? "$locus2: Failed to set max to 100"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 0))
+	check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 100))
+	check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+	bridge_port_maxgroups_set "$locus0" 100
+	check_err $? "$locus0: Failed to set max to 100"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 100))
+	check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 100))
+	check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+	bridge_port_vlan_maxgroups_set "$locus1" 0
+	check_err $? "$locus1: Failed to set max to 0"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 100))
+	check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 100))
+	check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+	bridge_port_vlan_maxgroups_set "$locus2" 0
+	check_err $? "$locus2: Failed to set max to 0"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 100))
+	check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 0))
+	check_err $? "$locus2: Max groups expected to be 0 but $max reported"
+
+	bridge_port_maxgroups_set "$locus0" 0
+	check_err $? "$locus0: Failed to set max to 0"
+
+	max=$(bridge_port_maxgroups_get "$locus0")
+	((max == 0))
+	check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+	max=$(bridge_port_vlan_maxgroups_get "$locus2")
+	((max == 0))
+	check_err $? "$locus2: Max groups expected to be 0, but $max reported"
+
+	log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN maximums"
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_cfg4()
+{
+	test_maxgroups_zero_cross_vlan cfg4
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_ctl4()
+{
+	test_maxgroups_zero_cross_vlan ctl4
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_cfg6()
+{
+	test_maxgroups_zero_cross_vlan cfg6
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_ctl6()
+{
+	test_maxgroups_zero_cross_vlan ctl6
+}
+
+test_maxgroups_too_low()
+{
+	local CFG=$1; shift
+	local context=$1; shift
+	local locus=$1; shift
+
+	RET=0
+
+	local n=$(bridge_${context}_ngroups_get "$locus")
+	local msg
+
+	${CFG}_entries_add "$locus" temp 5 111
+	check_err $? "$locus: Couldn't add MDB entries"
+
+	bridge_${context}_maxgroups_set "$locus" $((n+2))
+	check_err $? "$locus: Setting maxgroups to $((n+2)) failed"
+
+	msg=$(${CFG}_entries_add "$locus" temp 2 112 2>&1)
+	check_fail $? "$locus: Adding more entries passed when max<n"
+	bridge_maxgroups_errmsg_check_cfg "$msg"
+
+	${CFG}_entries_del "$locus" temp 5 111
+	check_err $? "$locus: Couldn't delete MDB entries"
+
+	${CFG}_entries_add "$locus" temp 2 112
+	check_err $? "$locus: Adding more entries failed"
+
+	${CFG}_entries_del "$locus" temp 2 112
+	check_err $? "$locus: Deleting more entries failed"
+
+	bridge_${context}_maxgroups_set "$locus" 0
+	check_err $? "$locus: Couldn't set maximum to 0"
+
+	log_test "$CFG: $context maxgroups: configure below ngroups"
+}
+
+test_8021d_maxgroups_too_low_cfg4()
+{
+	test_maxgroups_too_low cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_ctl4()
+{
+	test_maxgroups_too_low ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_cfg6()
+{
+	test_maxgroups_too_low cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_ctl6()
+{
+	test_maxgroups_too_low ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_too_low_cfg4()
+{
+	test_maxgroups_too_low cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_ctl4()
+{
+	test_maxgroups_too_low ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_cfg6()
+{
+	test_maxgroups_too_low cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_ctl6()
+{
+	test_maxgroups_too_low ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_cfg4()
+{
+	test_maxgroups_too_low cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_ctl4()
+{
+	test_maxgroups_too_low ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_cfg6()
+{
+	test_maxgroups_too_low cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_ctl6()
+{
+	test_maxgroups_too_low ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_too_many_entries()
+{
+	local CFG=$1; shift
+	local context=$1; shift
+	local locus=$1; shift
+
+	RET=0
+
+	local n=$(bridge_${context}_ngroups_get "$locus")
+	local msg
+
+	# Configure a low maximum
+	bridge_${context}_maxgroups_set "$locus" $((n+1))
+	check_err $? "$locus: Couldn't set maximum"
+
+	# Try to add more entries than the configured maximum
+	msg=$(${CFG}_entries_add "$locus" temp 5 2>&1)
+	check_fail $? "Adding 5 MDB entries passed, but should have failed"
+	bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+	# When adding entries through the control path, as many as possible
+	# get created. That's consistent with the mcast_hash_max behavior.
+	# So there, drop the entries explicitly.
+	if [[ ${CFG%[46]} == ctl ]]; then
+		${CFG}_entries_del "$locus" temp 17 2>&1
+	fi
+
+	local n2=$(bridge_${context}_ngroups_get "$locus")
+	((n2 == n))
+	check_err $? "Number of groups was $n, but after a failed attempt to add MDB entries it changed to $n2"
+
+	bridge_${context}_maxgroups_set "$locus" 0
+	check_err $? "$locus: Couldn't set maximum to 0"
+
+	log_test "$CFG: $context maxgroups: add too many MDB entries"
+}
+
+test_8021d_maxgroups_too_many_entries_cfg4()
+{
+	test_maxgroups_too_many_entries cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_ctl4()
+{
+	test_maxgroups_too_many_entries ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_cfg6()
+{
+	test_maxgroups_too_many_entries cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_ctl6()
+{
+	test_maxgroups_too_many_entries ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_too_many_entries_cfg4()
+{
+	test_maxgroups_too_many_entries cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_ctl4()
+{
+	test_maxgroups_too_many_entries ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_cfg6()
+{
+	test_maxgroups_too_many_entries cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_ctl6()
+{
+	test_maxgroups_too_many_entries ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_cfg4()
+{
+	test_maxgroups_too_many_entries cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_ctl4()
+{
+	test_maxgroups_too_many_entries ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_cfg6()
+{
+	test_maxgroups_too_many_entries cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_ctl6()
+{
+	test_maxgroups_too_many_entries ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_too_many_cross_vlan()
+{
+	local CFG=$1; shift
+
+	RET=0
+
+	local locus0="dev $swp1"
+	local locus1="dev $swp1 vid 10"
+	local locus2="dev $swp1 vid 20"
+	local n1=$(bridge_port_vlan_ngroups_get "$locus1")
+	local n2=$(bridge_port_vlan_ngroups_get "$locus2")
+	local msg
+
+	if ((n1 > n2)); then
+		local tmp=$n1
+		n1=$n2
+		n2=$tmp
+
+		tmp="$locus1"
+		locus1="$locus2"
+		locus2="$tmp"
+	fi
+
+	# Now 0 <= n1 <= n2.
+	${CFG}_entries_add "$locus2" temp 5 112
+	check_err $? "Couldn't add 5 entries"
+
+	n2=$(bridge_port_vlan_ngroups_get "$locus2")
+	# Now 0 <= n1 < n2-1.
+
+	# Setting locus1'maxgroups to n2-1 should pass. The number is
+	# smaller than both the absolute number of MDB entries, and in
+	# particular than number of locus2's number of entries, but it is
+	# large enough to cover locus1's entries. Thus we check that
+	# individual VLAN's ngroups are independent.
+	bridge_port_vlan_maxgroups_set "$locus1" $((n2-1))
+	check_err $? "Setting ${locus1}'s maxgroups to $((n2-1)) failed"
+
+	msg=$(${CFG}_entries_add "$locus1" temp $n2 111 2>&1)
+	check_fail $? "$locus1: Adding $n2 MDB entries passed, but should have failed"
+	bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+	bridge_port_maxgroups_set "$locus0" $((n1 + n2 + 2))
+	check_err $? "$locus0: Couldn't set maximum"
+
+	msg=$(${CFG}_entries_add "$locus1" temp 5 111 2>&1)
+	check_fail $? "$locus1: Adding 5 MDB entries passed, but should have failed"
+	bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+	# IGMP/MLD packets can cause several entries to be added, before
+	# the maximum is hit and the rest is then bounced. Remove what was
+	# committed, if anything.
+	${CFG}_entries_del "$locus1" temp 5 111 2>/dev/null
+
+	${CFG}_entries_add "$locus1" temp 2 111
+	check_err $? "$locus1: Adding 2 MDB entries failed, but should have passed"
+
+	${CFG}_entries_del "$locus1" temp 2 111
+	check_err $? "Couldn't delete MDB entries"
+
+	${CFG}_entries_del "$locus2" temp 5 112
+	check_err $? "Couldn't delete MDB entries"
+
+	bridge_port_vlan_maxgroups_set "$locus1" 0
+	check_err $? "$locus1: Couldn't set maximum to 0"
+
+	bridge_port_maxgroups_set "$locus0" 0
+	check_err $? "$locus0: Couldn't set maximum to 0"
+
+	log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN ngroups"
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_cfg4()
+{
+	test_maxgroups_too_many_cross_vlan cfg4
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_ctl4()
+{
+	test_maxgroups_too_many_cross_vlan ctl4
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_cfg6()
+{
+	test_maxgroups_too_many_cross_vlan cfg6
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_ctl6()
+{
+	test_maxgroups_too_many_cross_vlan ctl6
+}
+
+test_vlan_attributes()
+{
+	local locus=$1; shift
+	local expect=$1; shift
+
+	RET=0
+
+	local max=$(bridge_port_vlan_maxgroups_get "$locus")
+	local n=$(bridge_port_vlan_ngroups_get "$locus")
+
+	eval "[[ $max $expect ]]"
+	check_err $? "$locus: maxgroups attribute expected to be $expect, but was $max"
+
+	eval "[[ $n $expect ]]"
+	check_err $? "$locus: ngroups attribute expected to be $expect, but was $n"
+
+	log_test "port_vlan: presence of ngroups and maxgroups attributes"
+}
+
+test_8021q_vlan_attributes()
+{
+	test_vlan_attributes "dev $swp1 vid 10" "== null"
+}
+
+test_8021qvs_vlan_attributes()
+{
+	test_vlan_attributes "dev $swp1 vid 10" "-ge 0"
+}
+
+test_toggle_vlan_snooping()
+{
+	local mode=$1; shift
+
+	RET=0
+
+	local CFG=cfg4
+	local context=port_vlan
+	local locus="dev $swp1 vid 10"
+
+	${CFG}_entries_add "$locus" $mode 5
+	check_err $? "Couldn't add MDB entries"
+
+	bridge_${context}_maxgroups_set "$locus" 100
+	check_err $? "Failed to set max to 100"
+
+	ip link set dev br0 type bridge mcast_vlan_snooping 0
+	sleep 1
+	ip link set dev br0 type bridge mcast_vlan_snooping 1
+
+	local n=$(bridge_${context}_ngroups_get "$locus")
+	local nn=$(bridge mdb show dev br0 | grep $swp1 | wc -l)
+	((nn == n))
+	check_err $? "mcast_n_groups expected to be $nn, but $n reported"
+
+	local max=$(bridge_${context}_maxgroups_get "$locus")
+	((max == 100))
+	check_err $? "Max groups expected to be 100 but $max reported"
+
+	bridge_${context}_maxgroups_set "$locus" 0
+	check_err $? "Failed to set max to 0"
+
+	log_test "$CFG: $context: $mode: mcast_vlan_snooping toggle"
+}
+
+test_toggle_vlan_snooping_temp()
+{
+	test_toggle_vlan_snooping temp
+}
+
+test_toggle_vlan_snooping_permanent()
+{
+	test_toggle_vlan_snooping permanent
+}
+
+# ngroup test suites
+
+test_8021d_ngroups_cfg4()
+{
+	test_8021d_ngroups_reporting_cfg4
+}
+
+test_8021d_ngroups_ctl4()
+{
+	test_8021d_ngroups_reporting_ctl4
+}
+
+test_8021d_ngroups_cfg6()
+{
+	test_8021d_ngroups_reporting_cfg6
+}
+
+test_8021d_ngroups_ctl6()
+{
+	test_8021d_ngroups_reporting_ctl6
+}
+
+test_8021q_ngroups_cfg4()
+{
+	test_8021q_ngroups_reporting_cfg4
+}
+
+test_8021q_ngroups_ctl4()
+{
+	test_8021q_ngroups_reporting_ctl4
+}
+
+test_8021q_ngroups_cfg6()
+{
+	test_8021q_ngroups_reporting_cfg6
+}
+
+test_8021q_ngroups_ctl6()
+{
+	test_8021q_ngroups_reporting_ctl6
+}
+
+test_8021qvs_ngroups_cfg4()
+{
+	test_8021qvs_ngroups_reporting_cfg4
+	test_8021qvs_ngroups_cross_vlan_cfg4
+}
+
+test_8021qvs_ngroups_ctl4()
+{
+	test_8021qvs_ngroups_reporting_ctl4
+	test_8021qvs_ngroups_cross_vlan_ctl4
+}
+
+test_8021qvs_ngroups_cfg6()
+{
+	test_8021qvs_ngroups_reporting_cfg6
+	test_8021qvs_ngroups_cross_vlan_cfg6
+}
+
+test_8021qvs_ngroups_ctl6()
+{
+	test_8021qvs_ngroups_reporting_ctl6
+	test_8021qvs_ngroups_cross_vlan_ctl6
+}
+
+# maxgroups test suites
+
+test_8021d_maxgroups_cfg4()
+{
+	test_8021d_maxgroups_zero_cfg4
+	test_8021d_maxgroups_too_low_cfg4
+	test_8021d_maxgroups_too_many_entries_cfg4
+}
+
+test_8021d_maxgroups_ctl4()
+{
+	test_8021d_maxgroups_zero_ctl4
+	test_8021d_maxgroups_too_low_ctl4
+	test_8021d_maxgroups_too_many_entries_ctl4
+}
+
+test_8021d_maxgroups_cfg6()
+{
+	test_8021d_maxgroups_zero_cfg6
+	test_8021d_maxgroups_too_low_cfg6
+	test_8021d_maxgroups_too_many_entries_cfg6
+}
+
+test_8021d_maxgroups_ctl6()
+{
+	test_8021d_maxgroups_zero_ctl6
+	test_8021d_maxgroups_too_low_ctl6
+	test_8021d_maxgroups_too_many_entries_ctl6
+}
+
+test_8021q_maxgroups_cfg4()
+{
+	test_8021q_maxgroups_zero_cfg4
+	test_8021q_maxgroups_too_low_cfg4
+	test_8021q_maxgroups_too_many_entries_cfg4
+}
+
+test_8021q_maxgroups_ctl4()
+{
+	test_8021q_maxgroups_zero_ctl4
+	test_8021q_maxgroups_too_low_ctl4
+	test_8021q_maxgroups_too_many_entries_ctl4
+}
+
+test_8021q_maxgroups_cfg6()
+{
+	test_8021q_maxgroups_zero_cfg6
+	test_8021q_maxgroups_too_low_cfg6
+	test_8021q_maxgroups_too_many_entries_cfg6
+}
+
+test_8021q_maxgroups_ctl6()
+{
+	test_8021q_maxgroups_zero_ctl6
+	test_8021q_maxgroups_too_low_ctl6
+	test_8021q_maxgroups_too_many_entries_ctl6
+}
+
+test_8021qvs_maxgroups_cfg4()
+{
+	test_8021qvs_maxgroups_zero_cfg4
+	test_8021qvs_maxgroups_zero_cross_vlan_cfg4
+	test_8021qvs_maxgroups_too_low_cfg4
+	test_8021qvs_maxgroups_too_many_entries_cfg4
+	test_8021qvs_maxgroups_too_many_cross_vlan_cfg4
+}
+
+test_8021qvs_maxgroups_ctl4()
+{
+	test_8021qvs_maxgroups_zero_ctl4
+	test_8021qvs_maxgroups_zero_cross_vlan_ctl4
+	test_8021qvs_maxgroups_too_low_ctl4
+	test_8021qvs_maxgroups_too_many_entries_ctl4
+	test_8021qvs_maxgroups_too_many_cross_vlan_ctl4
+}
+
+test_8021qvs_maxgroups_cfg6()
+{
+	test_8021qvs_maxgroups_zero_cfg6
+	test_8021qvs_maxgroups_zero_cross_vlan_cfg6
+	test_8021qvs_maxgroups_too_low_cfg6
+	test_8021qvs_maxgroups_too_many_entries_cfg6
+	test_8021qvs_maxgroups_too_many_cross_vlan_cfg6
+}
+
+test_8021qvs_maxgroups_ctl6()
+{
+	test_8021qvs_maxgroups_zero_ctl6
+	test_8021qvs_maxgroups_zero_cross_vlan_ctl6
+	test_8021qvs_maxgroups_too_low_ctl6
+	test_8021qvs_maxgroups_too_many_entries_ctl6
+	test_8021qvs_maxgroups_too_many_cross_vlan_ctl6
+}
+
+# other test suites
+
+test_8021qvs_toggle_vlan_snooping()
+{
+	test_toggle_vlan_snooping_temp
+	test_toggle_vlan_snooping_permanent
+}
+
+# test groups
+
+test_8021d()
+{
+	# Tests for vlan_filtering 0 mcast_vlan_snooping 0.
+
+	switch_create_8021d
+	setup_wait
+
+	test_8021d_ngroups_cfg4
+	test_8021d_ngroups_ctl4
+	test_8021d_ngroups_cfg6
+	test_8021d_ngroups_ctl6
+	test_8021d_maxgroups_cfg4
+	test_8021d_maxgroups_ctl4
+	test_8021d_maxgroups_cfg6
+	test_8021d_maxgroups_ctl6
+
+	switch_destroy
+}
+
+test_8021q()
+{
+	# Tests for vlan_filtering 1 mcast_vlan_snooping 0.
+
+	switch_create_8021q
+	setup_wait
+
+	test_8021q_vlan_attributes
+	test_8021q_ngroups_cfg4
+	test_8021q_ngroups_ctl4
+	test_8021q_ngroups_cfg6
+	test_8021q_ngroups_ctl6
+	test_8021q_maxgroups_cfg4
+	test_8021q_maxgroups_ctl4
+	test_8021q_maxgroups_cfg6
+	test_8021q_maxgroups_ctl6
+
+	switch_destroy
+}
+
+test_8021qvs()
+{
+	# Tests for vlan_filtering 1 mcast_vlan_snooping 1.
+
+	switch_create_8021qvs
+	setup_wait
+
+	test_8021qvs_vlan_attributes
+	test_8021qvs_ngroups_cfg4
+	test_8021qvs_ngroups_ctl4
+	test_8021qvs_ngroups_cfg6
+	test_8021qvs_ngroups_ctl6
+	test_8021qvs_maxgroups_cfg4
+	test_8021qvs_maxgroups_ctl4
+	test_8021qvs_maxgroups_cfg6
+	test_8021qvs_maxgroups_ctl6
+	test_8021qvs_toggle_vlan_snooping
+
+	switch_destroy
+}
+
+if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
+	echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh
new file mode 100755
index 000000000000..1a0480e71d83
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that permanent mdb entries can be added to and deleted from bridge
+# interfaces that are down, and works correctly when done so.
+
+ALL_TESTS="add_del_to_port_down"
+NUM_NETIFS=4
+
+TEST_GROUP="239.10.10.10"
+TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+
+source lib.sh
+
+
+add_del_to_port_down() {
+	RET=0
+
+	ip link set dev $swp2 down
+	bridge mdb add dev br0 port "$swp2" grp $TEST_GROUP permanent 2>/dev/null
+	check_err $? "Failed adding mdb entry"
+
+	ip link set dev $swp2 up
+	setup_wait_dev $swp2
+	mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
+	check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
+
+	ip link set dev $swp2 down
+	bridge mdb show dev br0 | grep -q "$TEST_GROUP permanent" 2>/dev/null
+	check_err $? "MDB entry did not persist after link up/down"
+
+	bridge mdb del dev br0 port "$swp2" grp $TEST_GROUP 2>/dev/null
+	check_err $? "Failed deleting mdb entry"
+
+	ip link set dev $swp2 up
+	setup_wait_dev $swp2
+	mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
+	check_err $? "Traffic to $TEST_GROUP was forwarded after entry removed"
+
+	log_test "MDB add/del entry to port with state down "
+}
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	# Enable multicast filtering
+	ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+
+	bridge link set dev $swp2 mcast_flood off
+	# Bridge currently has a "grace time" at creation time before it
+	# forwards multicast according to the mdb. Since we disable the
+	# mcast_flood setting per port
+	sleep 10
+}
+
+switch_destroy()
+{
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+	h2_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
new file mode 100755
index 000000000000..e2b9ff773c6b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -0,0 +1,564 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
+	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
+	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
+	   mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="ff02::cc"
+TEST_GROUP_MAC="33:33:00:00:00:cc"
+
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3
+MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\
+00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
+00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
+00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\
+05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\
+00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\
+00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
+00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
+02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
+00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
+00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30
+MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21
+MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\
+00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\
+00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \
+					mcast_mld_version 2 mcast_startup_query_interval 300 \
+					mcast_querier 1
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	# make sure a query has been generated
+	sleep 5
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+mldv2include_prepare()
+{
+	local host1_if=$1
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+	ip link set dev br0 type bridge mcast_mld_version 2
+	check_err $? "Could not change bridge MLD version to 2"
+
+	$MZ $host1_if $MZPKT_IS_INC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+	check_err $? "Missing *,G entry with source list"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+	brmcast_check_sg_entries "is_include" "${X[@]}"
+}
+
+mldv2exclude_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local pkt=$4
+	local X=("2001:db8:1::1" "2001:db8:1::2")
+	local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+	mldv2include_prepare $h1
+
+	$MZ $host1_if -c 1 $MZPKT_IS_EXC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+			 .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::3\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists"
+}
+
+mldv2cleanup()
+{
+	local port=$1
+
+	bridge mdb del dev br0 port $port grp $TEST_GROUP
+	ip link set dev br0 type bridge mcast_mld_version 1
+}
+
+mldv2include_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+	mldv2include_prepare $h1
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP is_include"
+
+	mldv2cleanup $swp1
+}
+
+mldv2inc_allow_test()
+{
+	RET=0
+	local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_ALLOW -q
+	sleep 1
+	brmcast_check_sg_entries "allow" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP include -> allow"
+
+	mldv2cleanup $swp1
+}
+
+mldv2inc_is_include_test()
+{
+	RET=0
+	local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_INC2 -q
+	sleep 1
+	brmcast_check_sg_entries "is_include" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP include -> is_include"
+
+	mldv2cleanup $swp1
+}
+
+mldv2inc_is_exclude_test()
+{
+	RET=0
+
+	mldv2exclude_prepare $h1
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP include -> is_exclude"
+
+	mldv2cleanup $swp1
+}
+
+mldv2inc_to_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::1")
+	local Y=("2001:db8:1::20" "2001:db8:1::30")
+
+	mldv2include_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_TO_EXC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::21\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP include -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
+mldv2exc_allow_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+	local Y=("2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+	sleep 1
+	brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> allow"
+
+	mldv2cleanup $swp1
+}
+
+mldv2exc_is_include_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+	local Y=("2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_INC3 -q
+	sleep 1
+	brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> is_include"
+
+	mldv2cleanup $swp1
+}
+
+mldv2exc_is_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::30")
+	local Y=("2001:db8:1::20")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_EXC2 -q
+	sleep 1
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude"
+
+	mldv2cleanup $swp1
+}
+
+mldv2exc_to_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::30")
+	local Y=("2001:db8:1::20")
+
+	mldv2exclude_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_TO_EXC -q
+	sleep 1
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
+mldv2inc_block_test()
+{
+	RET=0
+	local X=("2001:db8:1::2" "2001:db8:1::3")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_BLOCK -q
+	# make sure the lowered timers have expired (by default 2 seconds)
+	sleep 3
+	brmcast_check_sg_entries "block" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 2001:db8:1::100
+
+	log_test "MLDv2 report $TEST_GROUP include -> block"
+
+	mldv2cleanup $swp1
+}
+
+mldv2exc_block_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30")
+	local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_BLOCK -q
+	sleep 1
+	brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> block"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
+mldv2exc_timeout_test()
+{
+	RET=0
+	local X=("2001:db8:1::20" "2001:db8:1::30")
+
+	# GMI should be 3 seconds
+	ip link set dev br0 type bridge mcast_query_interval 100 \
+					mcast_query_response_interval 100 \
+					mcast_membership_interval 300
+
+	mldv2exclude_prepare $h1
+	ip link set dev br0 type bridge mcast_query_interval 500 \
+					mcast_query_response_interval 500 \
+					mcast_membership_interval 1500
+
+	$MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+	sleep 3
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+
+	brmcast_check_sg_entries "allow" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 2001:db8:1::100
+
+	log_test "MLDv2 group $TEST_GROUP exclude timeout"
+
+	ip link set dev br0 type bridge mcast_query_interval 12500 \
+					mcast_query_response_interval 1000 \
+					mcast_membership_interval 26000
+
+	mldv2cleanup $swp1
+}
+
+mldv2star_ex_auto_add_test()
+{
+	RET=0
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h2 -c 1 $MZPKT_IS_INC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+				.port == \"$swp1\")" &>/dev/null
+	check_err $? "S,G entry for *,G port doesn't exist"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+				.port == \"$swp1\" and \
+				.flags[] == \"added_by_star_ex\")" &>/dev/null
+	check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+	brmcast_check_sg_fwding 1 2001:db8:1::3
+
+	log_test "MLDv2 S,G port entry automatic add to a *,G port"
+
+	mldv2cleanup $swp1
+	mldv2cleanup $swp2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index b90dff8d3a94..64bd00fe9a4f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -28,8 +28,9 @@ h2_destroy()
 
 switch_create()
 {
-	# 10 Seconds ageing time.
-	ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+	ip link add dev br0 type bridge \
+		vlan_filtering 1 \
+		ageing_time $LOW_AGEING_TIME \
 		mcast_snooping 0
 
 	ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
new file mode 100755
index 000000000000..72dfbeaf56b9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
@@ -0,0 +1,546 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="vlmc_control_test vlmc_querier_test vlmc_igmp_mld_version_test \
+	   vlmc_last_member_test vlmc_startup_query_test vlmc_membership_test \
+	   vlmc_querier_intvl_test vlmc_query_intvl_test vlmc_query_response_intvl_test \
+	   vlmc_router_port_test vlmc_filtering_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="239.10.10.10"
+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+	ip link add l $h1 $h1.10 up type vlan id 10
+}
+
+h1_destroy()
+{
+	ip link del $h1.10
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+	ip link add l $h2 $h2.10 up type vlan id 10
+}
+
+h2_destroy()
+{
+	ip link del $h2.10
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+
+	bridge vlan add vid 10-11 dev $swp1 master
+	bridge vlan add vid 10-11 dev $swp2 master
+
+	ip link set dev br0 type bridge mcast_vlan_snooping 1
+	check_err $? "Could not enable global vlan multicast snooping"
+	log_test "Vlan multicast snooping enable"
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+vlmc_v2join_test()
+{
+	local expect=$1
+
+	RET=0
+	ip address add dev $h2.10 $TEST_GROUP/32 autojoin
+	check_err $? "Could not join $TEST_GROUP"
+
+	sleep 5
+	bridge -j mdb show dev br0 |
+		jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and .vid == 10)" &>/dev/null
+	if [ $expect -eq 0 ]; then
+		check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
+	else
+		check_fail $? "IGMPv2 report shouldn't have created mdb entry for $TEST_GROUP"
+	fi
+
+	# check if we need to cleanup
+	if [ $RET -eq 0 ]; then
+		ip address del dev $h2.10 $TEST_GROUP/32 2>&1 1>/dev/null
+		sleep 5
+		bridge -j mdb show dev br0 |
+			jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and \
+						  .vid == 10)" &>/dev/null
+		check_fail $? "IGMPv2 leave didn't remove mdb entry for $TEST_GROUP"
+	fi
+}
+
+vlmc_control_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+	log_test "Vlan global options existence"
+
+	RET=0
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_snooping == 1) " &>/dev/null
+	check_err $? "Wrong default mcast_snooping global option value"
+	log_test "Vlan mcast_snooping global option default value"
+
+	RET=0
+	vlmc_v2join_test 0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 0
+	check_err $? "Could not disable multicast snooping in vlan 10"
+	vlmc_v2join_test 1
+	log_test "Vlan 10 multicast snooping control"
+}
+
+# setup for general query counting
+vlmc_query_cnt_xstats()
+{
+	local type=$1
+	local version=$2
+	local dev=$3
+
+	ip -j link xstats type bridge_slave dev $dev | \
+	jq -e ".[].multicast.${type}_queries.tx_v${version}"
+}
+
+vlmc_query_cnt_setup()
+{
+	local type=$1
+	local dev=$2
+
+	if [[ $type == "igmp" ]]; then
+		tc filter add dev $dev egress pref 10 prot 802.1Q \
+			flower vlan_id 10 vlan_ethtype ipv4 dst_ip 224.0.0.1 ip_proto 2 \
+			action pass
+	else
+		tc filter add dev $dev egress pref 10 prot 802.1Q \
+			flower vlan_id 10 vlan_ethtype ipv6 dst_ip ff02::1 ip_proto icmpv6 \
+			action pass
+	fi
+
+	ip link set dev br0 type bridge mcast_stats_enabled 1
+}
+
+vlmc_query_cnt_cleanup()
+{
+	local dev=$1
+
+	ip link set dev br0 type bridge mcast_stats_enabled 0
+	tc filter del dev $dev egress pref 10
+}
+
+vlmc_check_query()
+{
+	local type=$1
+	local version=$2
+	local dev=$3
+	local expect=$4
+	local time=$5
+	local ret=0
+
+	vlmc_query_cnt_setup $type $dev
+
+	local pre_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev)
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1
+	ret=$?
+	if [[ $ret -eq 0 ]]; then
+		sleep $time
+
+		local tcstats=$(tc_rule_stats_get $dev 10 egress)
+		local post_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev)
+
+		if [[ $tcstats != $expect || \
+		      $(($post_tx_xstats-$pre_tx_xstats)) != $expect || \
+		      $tcstats != $(($post_tx_xstats-$pre_tx_xstats)) ]]; then
+			ret=1
+		fi
+	fi
+
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0
+	vlmc_query_cnt_cleanup $dev
+
+	return $ret
+}
+
+vlmc_querier_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_querier == 0) " &>/dev/null
+	check_err $? "Wrong default mcast_querier global vlan option value"
+	log_test "Vlan mcast_querier global option default value"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1
+	check_err $? "Could not enable querier in vlan 10"
+	log_test "Vlan 10 multicast querier enable"
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0
+
+	RET=0
+	vlmc_check_query igmp 2 $swp1 1 1
+	check_err $? "No vlan tagged IGMPv2 general query packets sent"
+	log_test "Vlan 10 tagged IGMPv2 general query sent"
+
+	RET=0
+	vlmc_check_query mld 1 $swp1 1 1
+	check_err $? "No vlan tagged MLD general query packets sent"
+	log_test "Vlan 10 tagged MLD general query sent"
+}
+
+vlmc_igmp_mld_version_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_igmp_version == 2) " &>/dev/null
+	check_err $? "Wrong default mcast_igmp_version global vlan option value"
+	log_test "Vlan mcast_igmp_version global option default value"
+
+	RET=0
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_mld_version == 1) " &>/dev/null
+	check_err $? "Wrong default mcast_mld_version global vlan option value"
+	log_test "Vlan mcast_mld_version global option default value"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 3
+	check_err $? "Could not set mcast_igmp_version in vlan 10"
+	log_test "Vlan 10 mcast_igmp_version option changed to 3"
+
+	RET=0
+	vlmc_check_query igmp 3 $swp1 1 1
+	check_err $? "No vlan tagged IGMPv3 general query packets sent"
+	log_test "Vlan 10 tagged IGMPv3 general query sent"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 2
+	check_err $? "Could not set mcast_mld_version in vlan 10"
+	log_test "Vlan 10 mcast_mld_version option changed to 2"
+
+	RET=0
+	vlmc_check_query mld 2 $swp1 1 1
+	check_err $? "No vlan tagged MLDv2 general query packets sent"
+	log_test "Vlan 10 tagged MLDv2 general query sent"
+
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 2
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 1
+}
+
+vlmc_last_member_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and \
+					    .mcast_last_member_count == 2) " &>/dev/null
+	check_err $? "Wrong default mcast_last_member_count global vlan option value"
+	log_test "Vlan mcast_last_member_count global option default value"
+
+	RET=0
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and \
+					    .mcast_last_member_interval == 100) " &>/dev/null
+	check_err $? "Wrong default mcast_last_member_interval global vlan option value"
+	log_test "Vlan mcast_last_member_interval global option default value"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 3
+	check_err $? "Could not set mcast_last_member_count in vlan 10"
+	log_test "Vlan 10 mcast_last_member_count option changed to 3"
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 2
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 200
+	check_err $? "Could not set mcast_last_member_interval in vlan 10"
+	log_test "Vlan 10 mcast_last_member_interval option changed to 200"
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 100
+}
+
+vlmc_startup_query_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and \
+					    .mcast_startup_query_interval == 3125) " &>/dev/null
+	check_err $? "Wrong default mcast_startup_query_interval global vlan option value"
+	log_test "Vlan mcast_startup_query_interval global option default value"
+
+	RET=0
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and \
+					    .mcast_startup_query_count == 2) " &>/dev/null
+	check_err $? "Wrong default mcast_startup_query_count global vlan option value"
+	log_test "Vlan mcast_startup_query_count global option default value"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 100
+	check_err $? "Could not set mcast_startup_query_interval in vlan 10"
+	vlmc_check_query igmp 2 $swp1 2 3
+	check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+	log_test "Vlan 10 mcast_startup_query_interval option changed to 100"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 3
+	check_err $? "Could not set mcast_startup_query_count in vlan 10"
+	vlmc_check_query igmp 2 $swp1 3 4
+	check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+	log_test "Vlan 10 mcast_startup_query_count option changed to 3"
+
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 3125
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2
+}
+
+vlmc_membership_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and \
+					    .mcast_membership_interval == 26000) " &>/dev/null
+	check_err $? "Wrong default mcast_membership_interval global vlan option value"
+	log_test "Vlan mcast_membership_interval global option default value"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 200
+	check_err $? "Could not set mcast_membership_interval in vlan 10"
+	log_test "Vlan 10 mcast_membership_interval option changed to 200"
+
+	RET=0
+	vlmc_v2join_test 1
+	log_test "Vlan 10 mcast_membership_interval mdb entry expire"
+
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 26000
+}
+
+vlmc_querier_intvl_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and \
+					    .mcast_querier_interval == 25500) " &>/dev/null
+	check_err $? "Wrong default mcast_querier_interval global vlan option value"
+	log_test "Vlan mcast_querier_interval global option default value"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 100
+	check_err $? "Could not set mcast_querier_interval in vlan 10"
+	log_test "Vlan 10 mcast_querier_interval option changed to 100"
+
+	RET=0
+	ip link add dev br1 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 \
+					mcast_vlan_snooping 1
+	bridge vlan add vid 10 dev br1 self pvid untagged
+	ip link set dev $h1 master br1
+	ip link set dev br1 up
+	bridge vlan add vid 10 dev $h1 master
+	bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1
+	sleep 2
+	ip link del dev br1
+	ip addr replace 2001:db8:1::1/64 dev $h1
+	vlmc_check_query igmp 2 $swp1 1 1
+	check_err $? "Wrong number of IGMPv2 general queries after querier interval"
+	log_test "Vlan 10 mcast_querier_interval expire after outside query"
+
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 25500
+}
+
+vlmc_query_intvl_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and \
+					    .mcast_query_interval == 12500) " &>/dev/null
+	check_err $? "Wrong default mcast_query_interval global vlan option value"
+	log_test "Vlan mcast_query_interval global option default value"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 200
+	check_err $? "Could not set mcast_query_interval in vlan 10"
+	# 1 is sent immediately, then 2 more in the next 5 seconds
+	vlmc_check_query igmp 2 $swp1 3 5
+	check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+	log_test "Vlan 10 mcast_query_interval option changed to 200"
+
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 12500
+}
+
+vlmc_query_response_intvl_test()
+{
+	RET=0
+	local goutput=`bridge -j vlan global show`
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+	check_err $? "Could not find vlan 10's global options"
+
+	echo -n $goutput |
+		jq -e ".[].vlans[] | select(.vlan == 10 and \
+					    .mcast_query_response_interval == 1000) " &>/dev/null
+	check_err $? "Wrong default mcast_query_response_interval global vlan option value"
+	log_test "Vlan mcast_query_response_interval global option default value"
+
+	RET=0
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 200
+	check_err $? "Could not set mcast_query_response_interval in vlan 10"
+	log_test "Vlan 10 mcast_query_response_interval option changed to 200"
+
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000
+}
+
+vlmc_router_port_test()
+{
+	RET=0
+	local goutput=`bridge -j -d vlan show`
+	echo -n $goutput |
+		jq -e ".[] | select(.ifname == \"$swp1\" and \
+				    .vlans[].vlan == 10)" &>/dev/null
+	check_err $? "Could not find port vlan 10's options"
+
+	echo -n $goutput |
+		jq -e ".[] | select(.ifname == \"$swp1\" and \
+				    .vlans[].vlan == 10 and \
+				    .vlans[].mcast_router == 1)" &>/dev/null
+	check_err $? "Wrong default port mcast_router option value"
+	log_test "Port vlan 10 option mcast_router default value"
+
+	RET=0
+	bridge vlan set vid 10 dev $swp1 mcast_router 2
+	check_err $? "Could not set port vlan 10's mcast_router option"
+	log_test "Port vlan 10 mcast_router option changed to 2"
+
+	RET=0
+	tc filter add dev $swp1 egress pref 10 prot 802.1Q \
+		flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass
+	tc filter add dev $swp2 egress pref 10 prot 802.1Q \
+		flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass
+	bridge vlan set vid 10 dev $swp2 mcast_router 0
+	# we need to enable querier and disable query response interval to
+	# make sure packets are flooded only to router ports
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 \
+					      mcast_query_response_interval 0
+	bridge vlan add vid 10 dev br0 self
+	sleep 1
+	mausezahn br0 -Q 10 -c 10 -p 128 -b 01:00:5e:01:01:01 -B 239.1.1.1 \
+			-t udp "dp=1024" &>/dev/null
+	local swp1_tcstats=$(tc_rule_stats_get $swp1 10 egress)
+	if [[ $swp1_tcstats != 10 ]]; then
+		check_err 1 "Wrong number of vlan 10 multicast packets flooded"
+	fi
+	local swp2_tcstats=$(tc_rule_stats_get $swp2 10 egress)
+	check_err $swp2_tcstats "Vlan 10 multicast packets flooded to non-router port"
+	log_test "Flood unknown vlan multicast packets to router port only"
+
+	tc filter del dev $swp2 egress pref 10
+	tc filter del dev $swp1 egress pref 10
+	bridge vlan del vid 10 dev br0 self
+	bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000
+	bridge vlan set vid 10 dev $swp2 mcast_router 1
+	bridge vlan set vid 10 dev $swp1 mcast_router 1
+}
+
+vlmc_filtering_test()
+{
+	RET=0
+	ip link set dev br0 type bridge vlan_filtering 0
+	ip -j -d link show dev br0 | \
+	jq -e "select(.[0].linkinfo.info_data.mcast_vlan_snooping == 1)" &>/dev/null
+	check_fail $? "Vlan filtering is disabled but multicast vlan snooping is still enabled"
+	log_test "Disable multicast vlan snooping when vlan filtering is disabled"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index c15c6c85c984..1c8a26046589 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -27,8 +27,9 @@ h2_destroy()
 
 switch_create()
 {
-	# 10 Seconds ageing time.
-	ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+	ip link add dev br0 type bridge \
+		ageing_time $LOW_AGEING_TIME \
+		mcast_snooping 0
 
 	ip link set dev $swp1 master br0
 	ip link set dev $swp2 master br0
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index da96eff72a8e..8d7a1a004b7c 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -6,9 +6,49 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_NET_VRF=m
 CONFIG_BPF_SYSCALL=y
 CONFIG_CGROUP_BPF=y
+CONFIG_DUMMY=m
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_MACVLAN=m
+CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_VLAN=m
 CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_SCH_ETS=m
 CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_ACT_GACT=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_TEAM=y
+CONFIG_NET_TEAM_MODE_LOADBALANCE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_TABLES=m
 CONFIG_VETH=m
 CONFIG_NAMESPACES=y
 CONFIG_NET_NS=y
+CONFIG_VXLAN=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
new file mode 100755
index 000000000000..1783c10215e5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -0,0 +1,372 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution between two paths when using custom hash policy.
+#
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |   198.51.100.{2-253}/24 |      |
+# |   2001:db8:1::{2-fd}/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|-------------------------+
+# | SW1                     |                         |
+# |                    $rp1 +                         |
+# |         198.51.100.1/24                           |
+# |        2001:db8:1::1/64                           |
+# |                                                   |
+# |                                                   |
+# |            $rp11 +             + $rp12            |
+# |     192.0.2.1/28 |             | 192.0.2.17/28    |
+# | 2001:db8:2::1/64 |             | 2001:db8:3::1/64 |
+# +------------------|-------------|------------------+
+#                    |             |
+# +------------------|-------------|------------------+
+# | SW2              |             |                  |
+# |                  |             |                  |
+# |            $rp21 +             + $rp22            |
+# |     192.0.2.2/28                 192.0.2.18/28    |
+# | 2001:db8:2::2/64                 2001:db8:3::2/64 |
+# |                                                   |
+# |                                                   |
+# |                    $rp2 +                         |
+# |          203.0.113.1/24 |                         |
+# |        2001:db8:4::1/64 |                         |
+# +-------------------------|-------------------------+
+#                           |
+# +-------------------------|------+
+# | H2                      |      |
+# |                     $h2 +      |
+# |    203.0.113.{2-253}/24        |
+# |   2001:db8:4::{2-fd}/64        |
+# +--------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	custom_hash
+"
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+	ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+	ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 default
+	ip route del vrf v$h1 default
+	simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64
+	__simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64
+	__simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64
+
+	ip route add vrf v$rp1 203.0.113.0/24 \
+		nexthop via 192.0.2.2 dev $rp11 \
+		nexthop via 192.0.2.18 dev $rp12
+
+	ip -6 route add vrf v$rp1 2001:db8:4::/64 \
+		nexthop via 2001:db8:2::2 dev $rp11 \
+		nexthop via 2001:db8:3::2 dev $rp12
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$rp1 2001:db8:4::/64
+
+	ip route del vrf v$rp1 203.0.113.0/24
+
+	__simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64
+	__simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64
+	simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64
+	__simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64
+	__simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64
+
+	ip route add vrf v$rp2 198.51.100.0/24 \
+		nexthop via 192.0.2.1 dev $rp21 \
+		nexthop via 192.0.2.17 dev $rp22
+
+	ip -6 route add vrf v$rp2 2001:db8:1::/64 \
+		nexthop via 2001:db8:2::1 dev $rp21 \
+		nexthop via 2001:db8:3::1 dev $rp22
+}
+
+sw2_destroy()
+{
+	ip -6 route del vrf v$rp2 2001:db8:1::/64
+
+	ip route del vrf v$rp2 198.51.100.0/24
+
+	__simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64
+	__simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64
+	simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64
+	ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+	ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 default
+	ip route del vrf v$h2 default
+	simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	rp1=${NETIFS[p2]}
+
+	rp11=${NETIFS[p3]}
+	rp21=${NETIFS[p4]}
+
+	rp12=${NETIFS[p5]}
+	rp22=${NETIFS[p6]}
+
+	rp2=${NETIFS[p7]}
+
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:4::2
+}
+
+send_src_ipv4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B 203.0.113.2 \
+		-d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B 203.0.113.2 \
+		-d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+	# Generate 16384 echo requests, each with a random flow label.
+	for _ in $(seq 1 16384); do
+		ip vrf exec v$h1 \
+			$PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
+	done
+}
+
+send_src_udp6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B 2001:db8:4::2 \
+		-d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B 2001:db8:4::2 \
+		-d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+	local field="$1"; shift
+	local balanced="$1"; shift
+	local send_flows="$@"
+
+	RET=0
+
+	local t0_rp11=$(link_stats_tx_packets_get $rp11)
+	local t0_rp12=$(link_stats_tx_packets_get $rp12)
+
+	$send_flows
+
+	local t1_rp11=$(link_stats_tx_packets_get $rp11)
+	local t1_rp12=$(link_stats_tx_packets_get $rp12)
+
+	local d_rp11=$((t1_rp11 - t0_rp11))
+	local d_rp12=$((t1_rp12 - t0_rp12))
+
+	local diff=$((d_rp12 - d_rp11))
+	local sum=$((d_rp11 + d_rp12))
+
+	local pct=$(echo "$diff / $sum * 100" | bc -l)
+	local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+	[[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+	   ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+	check_err $? "Expected traffic to be $balanced, but it is not"
+
+	log_test "Multipath hash field: $field ($balanced)"
+	log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12"
+}
+
+custom_hash_v4()
+{
+	log_info "Running IPv4 custom multipath hash tests"
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001
+	custom_hash_test "Source IP" "balanced" send_src_ipv4
+	custom_hash_test "Source IP" "unbalanced" send_dst_ipv4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002
+	custom_hash_test "Destination IP" "balanced" send_dst_ipv4
+	custom_hash_test "Destination IP" "unbalanced" send_src_ipv4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010
+	custom_hash_test "Source port" "balanced" send_src_udp4
+	custom_hash_test "Source port" "unbalanced" send_dst_udp4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020
+	custom_hash_test "Destination port" "balanced" send_dst_udp4
+	custom_hash_test "Destination port" "unbalanced" send_src_udp4
+
+	sysctl_restore net.ipv4.neigh.default.gc_thresh3
+	sysctl_restore net.ipv4.neigh.default.gc_thresh2
+	sysctl_restore net.ipv4.neigh.default.gc_thresh1
+
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+custom_hash_v6()
+{
+	log_info "Running IPv6 custom multipath hash tests"
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001
+	custom_hash_test "Source IP" "balanced" send_src_ipv6
+	custom_hash_test "Source IP" "unbalanced" send_dst_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002
+	custom_hash_test "Destination IP" "balanced" send_dst_ipv6
+	custom_hash_test "Destination IP" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008
+	custom_hash_test "Flowlabel" "balanced" send_flowlabel
+	custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010
+	custom_hash_test "Source port" "balanced" send_src_udp6
+	custom_hash_test "Source port" "unbalanced" send_dst_udp6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020
+	custom_hash_test "Destination port" "balanced" send_dst_udp6
+	custom_hash_test "Destination port" "unbalanced" send_src_udp6
+
+	sysctl_restore net.ipv6.neigh.default.gc_thresh3
+	sysctl_restore net.ipv6.neigh.default.gc_thresh2
+	sysctl_restore net.ipv6.neigh.default.gc_thresh1
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+custom_hash()
+{
+	# Test that when the hash policy is set to custom, traffic is
+	# distributed only according to the fields set in the
+	# fib_multipath_hash_fields sysctl.
+	#
+	# Each time set a different field and make sure traffic is only
+	# distributed when the field is changed in the packet stream.
+	custom_hash_v4
+	custom_hash_v6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 9c12c4fd3afc..f1de525cfa55 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -1,6 +1,9 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 ##############################################################################
 # Defines
 
@@ -9,15 +12,21 @@ if [[ ! -v DEVLINK_DEV ]]; then
 			     | jq -r '.port | keys[]' | cut -d/ -f-2)
 	if [ -z "$DEVLINK_DEV" ]; then
 		echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
-		exit 1
+		exit $ksft_skip
 	fi
 	if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
 		echo "SKIP: devlink device's bus is not PCI"
-		exit 1
+		exit $ksft_skip
 	fi
 
 	DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
 			 -n | cut -d" " -f3)
+elif [[ ! -z "$DEVLINK_DEV" ]]; then
+	devlink dev show $DEVLINK_DEV &> /dev/null
+	if [ $? -ne 0 ]; then
+		echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
+		exit $ksft_skip
+	fi
 fi
 
 ##############################################################################
@@ -26,19 +35,19 @@ fi
 devlink help 2>&1 | grep resource &> /dev/null
 if [ $? -ne 0 ]; then
 	echo "SKIP: iproute2 too old, missing devlink resource support"
-	exit 1
+	exit $ksft_skip
 fi
 
 devlink help 2>&1 | grep trap &> /dev/null
 if [ $? -ne 0 ]; then
 	echo "SKIP: iproute2 too old, missing devlink trap support"
-	exit 1
+	exit $ksft_skip
 fi
 
 devlink dev help 2>&1 | grep info &> /dev/null
 if [ $? -ne 0 ]; then
 	echo "SKIP: iproute2 too old, missing devlink dev info support"
-	exit 1
+	exit $ksft_skip
 fi
 
 ##############################################################################
@@ -318,6 +327,14 @@ devlink_trap_rx_bytes_get()
 		| jq '.[][][]["stats"]["rx"]["bytes"]'
 }
 
+devlink_trap_drop_packets_get()
+{
+	local trap_name=$1; shift
+
+	devlink -js trap show $DEVLINK_DEV trap $trap_name \
+		| jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
 devlink_trap_stats_idle_test()
 {
 	local trap_name=$1; shift
@@ -339,6 +356,24 @@ devlink_trap_stats_idle_test()
 	fi
 }
 
+devlink_trap_drop_stats_idle_test()
+{
+	local trap_name=$1; shift
+	local t0_packets t0_bytes
+
+	t0_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+	sleep 1
+
+	t1_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		return 0
+	else
+		return 1
+	fi
+}
+
 devlink_traps_enable_all()
 {
 	local trap_name
@@ -468,25 +503,30 @@ devlink_trap_drop_cleanup()
 	tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
 }
 
-devlink_trap_stats_test()
+devlink_trap_stats_check()
 {
-	local test_name=$1; shift
 	local trap_name=$1; shift
 	local send_one="$@"
 	local t0_packets
 	local t1_packets
 
-	RET=0
-
 	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
 
 	$send_one && sleep 1
 
 	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
 
-	if [[ $t1_packets -eq $t0_packets ]]; then
-		check_err 1 "Trap stats did not increase"
-	fi
+	[[ $t1_packets -ne $t0_packets ]]
+}
+
+devlink_trap_stats_test()
+{
+	local test_name=$1; shift
+
+	RET=0
+
+	devlink_trap_stats_check "$@"
+	check_err $? "Trap stats did not increase"
 
 	log_test "$test_name"
 }
@@ -528,12 +568,6 @@ devlink_trap_group_policer_get()
 		| jq '.[][][]["policer"]'
 }
 
-devlink_trap_policer_ids_get()
-{
-	devlink -j -p trap policer show \
-		| jq '.[]["'$DEVLINK_DEV'"][]["policer"]'
-}
-
 devlink_port_by_netdev()
 {
 	local if_name=$1
@@ -555,3 +589,8 @@ devlink_cell_size_get()
 	devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
 	    | jq '.pool[][].cell_size'
 }
+
+devlink_pool_size_get()
+{
+	devlink sb show "$DEVLINK_DEV" -j | jq '.[][][]["size"]'
+}
diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
new file mode 100755
index 000000000000..68ee92df3e07
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
@@ -0,0 +1,367 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+                               +----------------------+
+# | H1 (vrf)           |                               |             H2 (vrf) |
+# |    + h1.10         |                               |  + h2.20             |
+# |    | 192.0.2.1/28  |                               |  | 192.0.2.2/28      |
+# |    |               |                               |  |                   |
+# |    + $h1           |                               |  + $h2               |
+# |    |               |                               |  |                   |
+# +----|---------------+                               +--|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|--------------------+
+# | SW |                                                  |                    |
+# | +--|-------------------------------+ +----------------|------------------+ |
+# | |  + $swp1         BR1 (802.1ad)   | | BR2 (802.1d)   + $swp2            | |
+# | |    vid 100 pvid untagged         | |                |                  | |
+# | |                                  | |                + $swp2.20         | |
+# | |                                  | |                                   | |
+# | |  + vx100 (vxlan)                 | |  + vx200 (vxlan)                  | |
+# | |    local 192.0.2.17              | |    local 192.0.2.17               | |
+# | |    remote 192.0.2.34             | |    remote 192.0.2.50              | |
+# | |    id 1000 dstport $VXPORT       | |    id 2000 dstport $VXPORT        | |
+# | |    vid 100 pvid untagged         | |                                   | |
+# | +--------------------------------- + +-----------------------------------+ |
+# |                                                                            |
+# |  192.0.2.32/28 via 192.0.2.18                                              |
+# |  192.0.2.48/28 via 192.0.2.18                                              |
+# |                                                                            |
+# |    + $rp1                                                                  |
+# |    | 192.0.2.17/28                                                         |
+# +----|-----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRP2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)      |
+# |    | 192.0.2.33/28                         | 192.0.2.49/28  |
+# +----|---------------------------------------|----------------+
+#      |                                       |
+# +----|------------------------------+   +----|------------------------------+
+# |    + v2 (veth)        NS1 (netns) |   |    + v4 (veth)        NS2 (netns) |
+# |      192.0.2.34/28                |   |      192.0.2.50/28                |
+# |                                   |   |                                   |
+# |   192.0.2.16/28 via 192.0.2.33    |   |   192.0.2.16/28 via 192.0.2.49    |
+# |   192.0.2.50/32 via 192.0.2.33    |   |   192.0.2.34/32 via 192.0.2.49    |
+# |                                   |   |                                   |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# | |                 BR3 (802.1ad) | |   | |                  BR3 (802.1d) | |
+# | |  + vx100 (vxlan)              | |   | |  + vx200 (vxlan)              | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 1000 dstport $VXPORT    | |   | |    id 2000 dstport $VXPORT    | |
+# | |    vid 100 pvid untagged      | |   | |                               | |
+# | |                               | |   | |  + w1.20                      | |
+# | |                               | |   | |  |                            | |
+# | |  + w1 (veth)                  | |   | |  + w1 (veth)                  | |
+# | |  | vid 100 pvid untagged      | |   | |  |                            | |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# |    |                              |   |    |                              |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# | |  |                  VW2 (vrf) | |   | |  |                  VW2 (vrf) | |
+# | |  + w2 (veth)                  | |   | |  + w2 (veth)                  | |
+# | |  |                            | |   | |  |                            | |
+# | |  |                            | |   | |  |                            | |
+# | |  + w2.10                      | |   | |  + w2.20                      | |
+# | |    192.0.2.3/28               | |   | |    192.0.2.4/28               | |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# +-----------------------------------+   +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 20 v$h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 192.0.2.17/28
+
+	ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+	ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+	ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+	ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+	#### BR1 ####
+	ip link add name br1 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br1 addrgenmode none
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	#### BR2 ####
+	ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br2 address $(mac_get $swp2)
+	ip link set dev br2 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	#### VX100 ####
+	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \
+		dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev vx100 master br1
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 100 dev $swp1 pvid untagged
+
+	#### VX200 ####
+	ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \
+		dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx200 up
+
+	ip link set dev vx200 master br2
+
+	ip link set dev $swp2 up
+	ip link add name $swp2.20 link $swp2 type vlan id 20
+	ip link set dev $swp2.20 master br2
+	ip link set dev $swp2.20 up
+
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	ip link set dev vx200 nomaster
+	ip link set dev vx200 down
+	ip link del dev vx200
+
+	ip link del dev $swp2.20
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 100 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx100 nomaster
+	ip link set dev vx100 down
+	ip link del dev vx100
+
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br2 down
+	ip link del dev br2
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	__simple_if_init v1 v$rp2 192.0.2.33/28
+	__simple_if_init v3 v$rp2 192.0.2.49/28
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 192.0.2.49/28
+	__simple_if_fini v1 192.0.2.33/28
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local vxlan_name=$1; shift
+	local vxlan_id=$1; shift
+	local vlan_id=$1; shift
+	local host_addr=$1; shift
+	local nh_addr=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/28
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br3 type bridge vlan_filtering 0
+	ip link set dev br3 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br3
+	ip link set dev w1 up
+
+	ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \
+		dstport "$VXPORT"
+	ip link set dev $vxlan_name up
+	bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev $vxlan_name master br3
+	tc qdisc add dev $vxlan_name clsact
+
+	simple_if_init w2
+	vlan_create w2 $vlan_id vw2 $host_addr/28
+
+	ip route add 192.0.2.16/28 nexthop via $nh_addr
+	ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \
+	      192.0.2.33
+
+	in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \
+	      192.0.2.49
+
+	in_ns ns2 ip link add name w1.20 link w1 type vlan id 20
+	in_ns ns2 ip link set dev w1.20 master br3
+	in_ns ns2 ip link set dev w1.20 up
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge"
+	ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge"
+}
+
+test_all()
+{
+	echo "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index dbb9fcf759e0..aa2eafb7b243 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -286,6 +286,8 @@ different_speeds_autoneg_on()
 	ethtool -s $h1 autoneg on
 }
 
+skip_on_veth
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
index 4b42dfd4efd1..17f89c3b7c02 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -11,6 +11,8 @@ NUM_NETIFS=2
 source lib.sh
 source ethtool_lib.sh
 
+TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
 setup_prepare()
 {
 	swp1=${NETIFS[p1]}
@@ -18,7 +20,7 @@ setup_prepare()
 	swp3=$NETIF_NO_CABLE
 }
 
-ethtool_extended_state_check()
+ethtool_ext_state()
 {
 	local dev=$1; shift
 	local expected_ext_state=$1; shift
@@ -30,21 +32,27 @@ ethtool_extended_state_check()
 		| sed -e 's/^[[:space:]]*//')
 	ext_state=$(echo $ext_state | cut -d "," -f1)
 
-	[[ $ext_state == $expected_ext_state ]]
-	check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\""
-
-	[[ $ext_substate == $expected_ext_substate ]]
-	check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+	if [[ $ext_state != $expected_ext_state ]]; then
+		echo "Expected \"$expected_ext_state\", got \"$ext_state\""
+		return 1
+	fi
+	if [[ $ext_substate != $expected_ext_substate ]]; then
+		echo "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+		return 1
+	fi
 }
 
 autoneg()
 {
+	local msg
+
 	RET=0
 
 	ip link set dev $swp1 up
 
-	sleep 4
-	ethtool_extended_state_check $swp1 "Autoneg" "No partner detected"
+	msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+			"Autoneg" "No partner detected")
+	check_err $? "$msg"
 
 	log_test "Autoneg, No partner detected"
 
@@ -53,6 +61,8 @@ autoneg()
 
 autoneg_force_mode()
 {
+	local msg
+
 	RET=0
 
 	ip link set dev $swp1 up
@@ -65,12 +75,13 @@ autoneg_force_mode()
 	ethtool_set $swp1 speed $speed1 autoneg off
 	ethtool_set $swp2 speed $speed2 autoneg off
 
-	sleep 4
-	ethtool_extended_state_check $swp1 "Autoneg" \
-		"No partner detected during force mode"
+	msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+			"Autoneg" "No partner detected during force mode")
+	check_err $? "$msg"
 
-	ethtool_extended_state_check $swp2 "Autoneg" \
-		"No partner detected during force mode"
+	msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \
+			"Autoneg" "No partner detected during force mode")
+	check_err $? "$msg"
 
 	log_test "Autoneg, No partner detected during force mode"
 
@@ -83,18 +94,22 @@ autoneg_force_mode()
 
 no_cable()
 {
+	local msg
+
 	RET=0
 
 	ip link set dev $swp3 up
 
-	sleep 1
-	ethtool_extended_state_check $swp3 "No cable"
+	msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable")
+	check_err $? "$msg"
 
 	log_test "No cable"
 
 	ip link set dev $swp3 down
 }
 
+skip_on_veth
+
 setup_prepare
 
 tests_run
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
index 9188e624dec0..b9bfb45085af 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
@@ -22,6 +22,40 @@ ethtool_set()
 	check_err $out "error in configuration. $cmd"
 }
 
+dev_linkmodes_params_get()
+{
+	local dev=$1; shift
+	local adver=$1; shift
+	local -a linkmodes_params
+	local param_count
+	local arr
+
+	if (($adver)); then
+		mode="Advertised link modes"
+	else
+		mode="Supported link modes"
+	fi
+
+	local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver))
+	for ((i=0; i<${#dev_linkmodes[@]}; i++)); do
+		linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \
+			# Replaces all non numbers with spaces
+			sed -e 's/[^0-9]/ /g' | \
+			# Squeeze spaces in sequence to 1 space
+			tr -s ' ')
+		# Count how many numbers were found in the linkmode
+		param_count=$(echo "${linkmodes_params[$i]}" | wc -w)
+		if [[ $param_count -eq 1 ]]; then
+			linkmodes_params[$i]="${linkmodes_params[$i]} 1"
+		elif [[ $param_count -ge 3 ]]; then
+			arr=(${linkmodes_params[$i]})
+			# Take only first two params
+			linkmodes_params[$i]=$(echo "${arr[@]:0:2}")
+		fi
+	done
+	echo ${linkmodes_params[@]}
+}
+
 dev_speeds_get()
 {
 	local dev=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
new file mode 100755
index 000000000000..50d5bfb17ef1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	manual_with_verification_h1_to_h2
+	manual_with_verification_h2_to_h1
+	manual_without_verification_h1_to_h2
+	manual_without_verification_h2_to_h1
+	manual_failed_verification_h1_to_h2
+	manual_failed_verification_h2_to_h1
+	lldp
+"
+
+NUM_NETIFS=2
+REQUIRE_MZ=no
+PREEMPTIBLE_PRIO=0
+source lib.sh
+
+traffic_test()
+{
+	local if=$1; shift
+	local src=$1; shift
+	local num_pkts=10000
+	local before=
+	local after=
+	local delta=
+
+	if [ ${has_pmac_stats[$if]} = false ]; then
+		src="aggregate"
+	fi
+
+	before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+	$MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO
+
+	after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+	delta=$((after - before))
+
+	# Allow an extra 1% tolerance for random packets sent by the stack
+	[ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ]
+}
+
+manual_with_verification()
+{
+	local tx=$1; shift
+	local rx=$1; shift
+
+	RET=0
+
+	# It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit
+	# Processing state diagram whether the "send_r" variable (send response
+	# to verification frame) should be taken into consideration while the
+	# MAC Merge TX direction is disabled. That being said, at least the
+	# NXP ENETC does not, and requires tx-enabled on in order to respond to
+	# the link partner's verification frames.
+	ethtool --set-mm $rx tx-enabled on
+	ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+	# Wait for verification to finish
+	sleep 1
+
+	ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+		grep -q 'SUCCEEDED'
+	check_err "$?" "Verification did not succeed"
+
+	ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+	check_err "$?" "pMAC TX is not active"
+
+	traffic_test $tx "pmac"
+	check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+	ethtool --set-mm $tx verify-enabled off tx-enabled off
+	ethtool --set-mm $rx tx-enabled off
+
+	log_test "Manual configuration with verification: $tx to $rx"
+}
+
+manual_with_verification_h1_to_h2()
+{
+	manual_with_verification $h1 $h2
+}
+
+manual_with_verification_h2_to_h1()
+{
+	manual_with_verification $h2 $h1
+}
+
+manual_without_verification()
+{
+	local tx=$1; shift
+	local rx=$1; shift
+
+	RET=0
+
+	ethtool --set-mm $tx verify-enabled off tx-enabled on
+
+	ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+		grep -q 'DISABLED'
+	check_err "$?" "Verification is not disabled"
+
+	ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+	check_err "$?" "pMAC TX is not active"
+
+	traffic_test $tx "pmac"
+	check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+	ethtool --set-mm $tx verify-enabled off tx-enabled off
+
+	log_test "Manual configuration without verification: $tx to $rx"
+}
+
+manual_without_verification_h1_to_h2()
+{
+	manual_without_verification $h1 $h2
+}
+
+manual_without_verification_h2_to_h1()
+{
+	manual_without_verification $h2 $h1
+}
+
+manual_failed_verification()
+{
+	local tx=$1; shift
+	local rx=$1; shift
+
+	RET=0
+
+	ethtool --set-mm $rx pmac-enabled off
+	ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+	# Wait for verification to time out
+	sleep 1
+
+	ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+		grep -q 'SUCCEEDED'
+	check_fail "$?" "Verification succeeded when it shouldn't have"
+
+	ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+	check_fail "$?" "pMAC TX is active when it shouldn't have"
+
+	traffic_test $tx "emac"
+	check_err "$?" "Traffic did not get sent through $tx's eMAC"
+
+	ethtool --set-mm $tx verify-enabled off tx-enabled off
+	ethtool --set-mm $rx pmac-enabled on
+
+	log_test "Manual configuration with failed verification: $tx to $rx"
+}
+
+manual_failed_verification_h1_to_h2()
+{
+	manual_failed_verification $h1 $h2
+}
+
+manual_failed_verification_h2_to_h1()
+{
+	manual_failed_verification $h2 $h1
+}
+
+smallest_supported_add_frag_size()
+{
+	local iface=$1
+	local rx_min_frag_size=
+
+	rx_min_frag_size=$(ethtool --json --show-mm $iface | \
+		jq '.[]."rx-min-frag-size"')
+
+	if [ $rx_min_frag_size -le 60 ]; then
+		echo 0
+	elif [ $rx_min_frag_size -le 124 ]; then
+		echo 1
+	elif [ $rx_min_frag_size -le 188 ]; then
+		echo 2
+	elif [ $rx_min_frag_size -le 252 ]; then
+		echo 3
+	else
+		echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP"
+		exit 1
+	fi
+}
+
+expected_add_frag_size()
+{
+	local iface=$1
+	local requested=$2
+	local min=$(smallest_supported_add_frag_size $iface)
+
+	[ $requested -le $min ] && echo $min || echo $requested
+}
+
+lldp_change_add_frag_size()
+{
+	local add_frag_size=$1
+	local pattern=
+
+	lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null
+	# Wait for TLVs to be received
+	sleep 2
+	pattern=$(printf "Additional fragment size: %d" \
+			 $(expected_add_frag_size $h1 $add_frag_size))
+	lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern"
+}
+
+lldp()
+{
+	RET=0
+
+	systemctl start lldpad
+
+	# Configure the interfaces to receive and transmit LLDPDUs
+	lldptool -L -i $h1 adminStatus=rxtx >/dev/null
+	lldptool -L -i $h2 adminStatus=rxtx >/dev/null
+
+	# Enable the transmission of Additional Ethernet Capabilities TLV
+	lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null
+	lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null
+
+	# Wait for TLVs to be received
+	sleep 2
+
+	lldptool -i $h1 -t -n -V addEthCaps | \
+		grep -q "Preemption capability active"
+	check_err "$?" "$h1 pMAC TX is not active"
+
+	lldptool -i $h2 -t -n -V addEthCaps | \
+		grep -q "Preemption capability active"
+	check_err "$?" "$h2 pMAC TX is not active"
+
+	lldp_change_add_frag_size 3
+	check_err "$?" "addFragSize 3"
+
+	lldp_change_add_frag_size 2
+	check_err "$?" "addFragSize 2"
+
+	lldp_change_add_frag_size 1
+	check_err "$?" "addFragSize 1"
+
+	lldp_change_add_frag_size 0
+	check_err "$?" "addFragSize 0"
+
+	traffic_test $h1 "pmac"
+	check_err "$?" "Traffic did not get sent through $h1's pMAC"
+
+	traffic_test $h2 "pmac"
+	check_err "$?" "Traffic did not get sent through $h2's pMAC"
+
+	systemctl stop lldpad
+
+	log_test "LLDP"
+}
+
+h1_create()
+{
+	ip link set dev $h1 up
+
+	tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \
+		queues 1@0 1@1 1@2 1@3 \
+		fp P E E E \
+		hw 1
+
+	ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off
+}
+
+h2_create()
+{
+	ip link set dev $h2 up
+
+	ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off
+
+	tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \
+		queues 1@0 1@1 1@2 1@3 \
+		fp P E E E \
+		hw 1
+}
+
+h1_destroy()
+{
+	ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off
+
+	tc qdisc del dev $h1 root
+
+	ip link set dev $h1 down
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 root
+
+	ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off
+
+	ip link set dev $h2 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+}
+
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+	ethtool --show-mm $netif 2>&1 &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: $netif does not support MAC Merge"
+		exit $ksft_skip
+	fi
+
+	if check_ethtool_pmac_std_stats_support $netif eth-mac; then
+		has_pmac_stats[$netif]=true
+	else
+		has_pmac_stats[$netif]=false
+		echo "$netif does not report pMAC statistics, falling back to aggregate"
+	fi
+done
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
new file mode 100755
index 000000000000..41a34a61f763
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	rmon_rx_histogram
+	rmon_tx_histogram
+"
+
+NUM_NETIFS=2
+source lib.sh
+
+ETH_FCS_LEN=4
+ETH_HLEN=$((6+6+2))
+
+declare -A netif_mtu
+
+ensure_mtu()
+{
+	local iface=$1; shift
+	local len=$1; shift
+	local current=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+	local required=$((len - ETH_HLEN - ETH_FCS_LEN))
+
+	if [ $current -lt $required ]; then
+		ip link set dev $iface mtu $required || return 1
+	fi
+}
+
+bucket_test()
+{
+	local iface=$1; shift
+	local neigh=$1; shift
+	local set=$1; shift
+	local bucket=$1; shift
+	local len=$1; shift
+	local num_rx=10000
+	local num_tx=20000
+	local expected=
+	local before=
+	local after=
+	local delta=
+
+	# Mausezahn does not include FCS bytes in its length - but the
+	# histogram counters do
+	len=$((len - ETH_FCS_LEN))
+
+	before=$(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+	# Send 10k one way and 20k in the other, to detect counters
+	# mapped to the wrong direction
+	$MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us
+	$MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us
+
+	after=$(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+	delta=$((after - before))
+
+	expected=$([ $set = rx ] && echo $num_rx || echo $num_tx)
+
+	# Allow some extra tolerance for other packets sent by the stack
+	[ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ]
+}
+
+rmon_histogram()
+{
+	local iface=$1; shift
+	local neigh=$1; shift
+	local set=$1; shift
+	local nbuckets=0
+	local step=
+
+	RET=0
+
+	while read -r -a bucket; do
+		step="$set-pkts${bucket[0]}to${bucket[1]} on $iface"
+
+		for if in $iface $neigh; do
+			if ! ensure_mtu $if ${bucket[0]}; then
+				log_test_skip "$if does not support the required MTU for $step"
+				return
+			fi
+		done
+
+		if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then
+			check_err 1 "$step failed"
+			return 1
+		fi
+		log_test "$step"
+		nbuckets=$((nbuckets + 1))
+	done < <(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null)
+
+	if [ $nbuckets -eq 0 ]; then
+		log_test_skip "$iface does not support $set histogram counters"
+		return
+	fi
+}
+
+rmon_rx_histogram()
+{
+	rmon_histogram $h1 $h2 rx
+	rmon_histogram $h2 $h1 rx
+}
+
+rmon_tx_histogram()
+{
+	rmon_histogram $h1 $h2 tx
+	rmon_histogram $h2 $h1 tx
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	for iface in $h1 $h2; do
+		netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+		ip link set dev $iface up
+	done
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	for iface in $h2 $h1; do
+		ip link set dev $iface \
+			mtu ${netif_mtu[$iface]} \
+			down
+	done
+}
+
+check_ethtool_counter_group_support
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
index 66496659bea7..1b3b46292179 100644
--- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -99,15 +99,15 @@ fib_ipv4_tos_test()
 	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false
 	check_err $? "Route not in hardware when should"
 
-	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 2 metric 1024
-	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 2 metric 1024" false
+	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 8 metric 1024
+	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 8 metric 1024" false
 	check_err $? "Highest TOS route not in hardware when should"
 
 	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true
 	check_err $? "Lowest TOS route still in hardware when should not"
 
-	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 metric 1024
-	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1 metric 1024" true
+	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4 metric 1024
+	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4 metric 1024" true
 	check_err $? "Middle TOS route in hardware when should not"
 
 	log_test "IPv4 routes with TOS"
@@ -224,7 +224,7 @@ fib_ipv4_plen_test()
 	ip -n $ns link set dev dummy1 up
 
 	# Add two routes with the same key and different prefix length and
-	# make sure both are in hardware. It can be verfied that both are
+	# make sure both are in hardware. It can be verified that both are
 	# sharing the same leaf by checking the /proc/net/fib_trie
 	ip -n $ns route add 192.0.2.0/24 dev dummy1
 	ip -n $ns route add 192.0.2.0/25 dev dummy1
@@ -277,11 +277,11 @@ fib_ipv4_replay_tos_test()
 	ip -n $ns link set dev dummy1 up
 
 	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0
-	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1
+	ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4
 
 	devlink -N $ns dev reload $devlink_dev
 
-	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1" false
+	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4" false
 	check_err $? "Highest TOS route not in hardware when should"
 
 	fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0" true
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index b802c14d2950..1fc4f0242fc5 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -13,6 +13,8 @@ NETIFS[p5]=veth4
 NETIFS[p6]=veth5
 NETIFS[p7]=veth6
 NETIFS[p8]=veth7
+NETIFS[p9]=veth8
+NETIFS[p10]=veth9
 
 # Port that does not have a cable connected.
 NETIF_NO_CABLE=eth8
@@ -26,6 +28,8 @@ PING=ping
 PING6=ping6
 # Packet generator. Some distributions use 'mz'.
 MZ=mausezahn
+# mausezahn delay between transmissions in microseconds.
+MZ_DELAY=0
 # Time to wait after interfaces participating in the test are all UP
 WAIT_TIME=5
 # Whether to pause on failure or not.
@@ -39,3 +43,11 @@ NETIF_CREATE=yes
 # Timeout (in seconds) before ping exits regardless of how many packets have
 # been sent or received
 PING_TIMEOUT=5
+# Minimum ageing_time (in centiseconds) supported by hardware
+LOW_AGEING_TIME=1000
+# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
+# filter by software/hardware
+TC_FLAG=skip_hw
+# IPv6 traceroute utility name.
+TROUTE6=traceroute6
+
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..9788bd0f6e8b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -0,0 +1,464 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv4 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |   198.51.100.{2-253}/24 |      |
+# |   2001:db8:1::{2-fd}/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|------------------+
+# | SW1                     |                  |
+# |                    $ol1 +                  |
+# |         198.51.100.1/24                    |
+# |        2001:db8:1::1/64                    |
+# |                                            |
+# |   + g1 (gre)                               |
+# |     loc=192.0.2.1                          |
+# |     rem=192.0.2.2 --.                      |
+# |     tos=inherit     |                      |
+# |                     v                      |
+# |                     + $ul1                 |
+# |                     | 192.0.2.17/28        |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# | SW2                 |                      |
+# |               $ul21 +                      |
+# |       192.0.2.18/28 |                      |
+# |                     |                      |
+# !   __________________+___                   |
+# |  /                      \                  |
+# |  |                      |                  |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan) |
+# |  | 192.0.2.33/28        | 192.0.2.49/28    |
+# |  |                      |                  |
+# +--|----------------------|------------------+
+#    |                      |
+# +--|----------------------|------------------+
+# |  |                      |                  |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan) |
+# |  | 192.0.2.34/28        | 192.0.2.50/28    |
+# |  |                      |                  |
+# |  \__________________+___/                  |
+# |                     |                      |
+# |                     |                      |
+# |               $ul31 +                      |
+# |       192.0.2.65/28 |                  SW3 |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# |                     + $ul4                 |
+# |                     ^ 192.0.2.66/28        |
+# |                     |                      |
+# |   + g2 (gre)        |                      |
+# |     loc=192.0.2.2   |                      |
+# |     rem=192.0.2.1 --'                      |
+# |     tos=inherit                            |
+# |                                            |
+# |                    $ol4 +                  |
+# |          203.0.113.1/24 |                  |
+# |        2001:db8:2::1/64 |              SW4 |
+# +-------------------------|------------------+
+#                           |
+# +-------------------------|------+
+# |                         |      |
+# |                     $h2 +      |
+# |    203.0.113.{2-253}/24        |
+# |   2001:db8:2::{2-fd}/64     H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+	ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+	ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 default
+	ip route del vrf v$h1 default
+	simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+	__simple_if_init $ul1 v$ol1 192.0.2.17/28
+
+	tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 192.0.2.1/32
+	ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18
+
+	ip route add vrf v$ol1 203.0.113.0/24 dev g1
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 203.0.113.0/24
+
+	ip route del vrf v$ol1 192.0.2.2/32
+	__simple_if_fini g1 192.0.2.1/32
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 192.0.2.17/28
+	simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 192.0.2.18/28
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 192.0.2.33/28
+	vlan_create $ul22 222 v$ul21 192.0.2.49/28
+
+	ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17
+	ip route add vrf v$ul21 192.0.2.2/32 \
+	   nexthop via 192.0.2.34 \
+	   nexthop via 192.0.2.50
+}
+
+sw2_destroy()
+{
+	ip route del vrf v$ul21 192.0.2.2/32
+	ip route del vrf v$ul21 192.0.2.1/32
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 192.0.2.18/28
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 192.0.2.65/28
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 192.0.2.34/28
+	vlan_create $ul32 222 v$ul31 192.0.2.50/28
+
+	ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66
+	ip route add vrf v$ul31 192.0.2.1/32 \
+	   nexthop via 192.0.2.33 \
+	   nexthop via 192.0.2.49
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip route del vrf v$ul31 192.0.2.1/32
+	ip route del vrf v$ul31 192.0.2.2/32
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 192.0.2.65/28
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+	__simple_if_init $ul4 v$ol4 192.0.2.66/28
+
+	tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 192.0.2.2/32
+	ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65
+
+	ip route add vrf v$ol4 198.51.100.0/24 dev g2
+	ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+	ip -6 route del vrf v$ol4 2001:db8:1::/64
+	ip route del vrf v$ol4 198.51.100.0/24
+
+	ip route del vrf v$ol4 192.0.2.1/32
+	__simple_if_fini g2 192.0.2.2/32
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 192.0.2.66/28
+	simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+	ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+	ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 default
+	ip route del vrf v$h2 default
+	simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B 203.0.113.2 \
+		-d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B 203.0.113.2 \
+		-d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+	# Generate 16384 echo requests, each with a random flow label.
+	for _ in $(seq 1 16384); do
+		ip vrf exec v$h1 \
+			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+	done
+}
+
+send_src_udp6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+	local field="$1"; shift
+	local balanced="$1"; shift
+	local send_flows="$@"
+
+	RET=0
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	$send_flows
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+
+	local diff=$((d222 - d111))
+	local sum=$((d111 + d222))
+
+	local pct=$(echo "$diff / $sum * 100" | bc -l)
+	local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+	[[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+	   ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+	check_err $? "Expected traffic to be $balanced, but it is not"
+
+	log_test "Multipath hash field: $field ($balanced)"
+	log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+	log_info "Running IPv4 overlay custom multipath hash tests"
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+	custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+	custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+	custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+	custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+	custom_hash_test "Inner source port" "balanced" send_src_udp4
+	custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+	custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+	custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+	sysctl_restore net.ipv4.neigh.default.gc_thresh3
+	sysctl_restore net.ipv4.neigh.default.gc_thresh2
+	sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+	log_info "Running IPv6 overlay custom multipath hash tests"
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+	custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+	custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+	custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+	custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200
+	custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+	custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+	custom_hash_test "Inner source port" "balanced" send_src_udp6
+	custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+	sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+	custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+	custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+	sysctl_restore net.ipv6.neigh.default.gc_thresh3
+	sysctl_restore net.ipv6.neigh.default.gc_thresh2
+	sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+	# Test that when the hash policy is set to custom, traffic is
+	# distributed only according to the fields set in the
+	# fib_multipath_hash_fields sysctl.
+	#
+	# Each time set a different field and make sure traffic is only
+	# distributed when the field is changed in the packet stream.
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+	custom_hash_v4
+	custom_hash_v6
+
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
index e4009f658003..efca6114a3ce 100755
--- a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
@@ -267,7 +267,7 @@ multipath4_test()
 
 	ip vrf exec v$h1 \
 	   $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
-	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	       -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
 	sleep 1
 
 	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
index e449475c4d3e..a71ad39fc0c3 100755
--- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
@@ -266,9 +266,9 @@ multipath6_test()
 	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
 
 	ip vrf exec v$h1 \
-	   $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
-	       -B "2001:db8:2::2-2001:db8:2::1e" \
-	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	   $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \
+	       -B "2001:db8:2::2-2001:db8:2::3e" \
+	       -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
 	sleep 1
 
 	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
index a8d8e8b3dc81..57531c1d884d 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -220,7 +220,7 @@ multipath4_test()
 
 	ip vrf exec v$h1 \
 	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
-	       -d 1msec -t udp "sp=1024,dp=0-32768"
+	       -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
 
 	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
 	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
new file mode 100755
index 000000000000..7d5b2b9cc133
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
@@ -0,0 +1,319 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |  2001:db8:1::2/64                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# |  2001:db8:2::1/64 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# |  2001:db8:2::2/64       |
+# +-------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_ipv4
+	multipath_ipv6
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+	__simple_if_init $ul1 v$ol1
+	vlan_create $ul1 111 v$ol1 192.0.2.129/28
+	vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+	tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1a v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+	__simple_if_init g1b v$ol1 192.0.2.81/32
+	ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+	ip -6 nexthop add id 101 dev g1a
+	ip -6 nexthop add id 102 dev g1b
+	ip nexthop add id 103 group 101/102
+
+	ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+	ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 192.0.2.16/28
+
+	ip nexthop del id 103
+	ip -6 nexthop del id 102
+	ip -6 nexthop del id 101
+
+	ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+	__simple_if_fini g1b 192.0.2.81/32
+	tunnel_destroy g1b
+
+	ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 222
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+	simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+	vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+	tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+	tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+	__simple_if_init g2b v$ol2 192.0.2.82/32
+	ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+	ip -6 nexthop add id 201 dev g2a
+	ip -6 nexthop add id 202 dev g2b
+	ip nexthop add id 203 group 201/202
+
+	ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+	ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+	tc qdisc add dev $ul2 clsact
+	tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+	tc qdisc del dev $ul2 clsact
+
+	ip route del vrf v$ol2 2001:db8:1::/64
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip nexthop del id 203
+	ip -6 nexthop del id 202
+	ip -6 nexthop del id 201
+
+	ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+	__simple_if_fini g2b 192.0.2.82/32
+	tunnel_destroy g2b
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 222
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+	       -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+		$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
new file mode 100755
index 000000000000..370f9925302d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -0,0 +1,323 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |  2001:db8:1::2/64                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# |  2001:db8:2::1/64 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# |  2001:db8:2::2/64       |
+# +-------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_ipv4
+	multipath_ipv6
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+	__simple_if_init $ul1 v$ol1
+	vlan_create $ul1 111 v$ol1 192.0.2.129/28
+	vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+	tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1a v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+	__simple_if_init g1b v$ol1 192.0.2.81/32
+	ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+	ip -6 nexthop add id 101 dev g1a
+	ip -6 nexthop add id 102 dev g1b
+	ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+		idle_timer 0
+
+	ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+	ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 192.0.2.16/28
+
+	ip nexthop del id 103
+	ip -6 nexthop del id 102
+	ip -6 nexthop del id 101
+
+	ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+	__simple_if_fini g1b 192.0.2.81/32
+	tunnel_destroy g1b
+
+	ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 222
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+	simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+	vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+	tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+	tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+	__simple_if_init g2b v$ol2 192.0.2.82/32
+	ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+	ip -6 nexthop add id 201 dev g2a
+	ip -6 nexthop add id 202 dev g2b
+	ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+		idle_timer 0
+
+	ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+	ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+	tc qdisc add dev $ul2 clsact
+	tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+	tc qdisc del dev $ul2 clsact
+
+	ip route del vrf v$ol2 2001:db8:1::/64
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip nexthop del id 203
+	ip -6 nexthop del id 202
+	ip -6 nexthop del id 201
+
+	ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+	__simple_if_fini g2b 192.0.2.82/32
+	tunnel_destroy g2b
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 222
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+		type resilient
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+	       -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102 type resilient
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+		type resilient
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+		$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102 type resilient
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
new file mode 100755
index 000000000000..48584a51388f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |          $h1.200 + |                     | + $h2.200            |
+# |     192.0.2.1/28 | |                     | | 192.0.2.18/28      |
+# | 2001:db8:1::1/64 | |                     | | 2001:db8:2::1/64   |
+# |                  | |                     | |                    |
+# |              $h1 + |                     | + $h2                |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |                  |                         |                    |
+# |         $rp1.200 +                         + $rp2.200           |
+# |     192.0.2.2/28                             192.0.2.17/28      |
+# | 2001:db8:1::2/64                             2001:db8:2::2/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	respin_enablement
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	reapply_config
+	ping_ipv4
+	ping_ipv6
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	test_stats_report_rx
+	test_stats_report_tx
+	test_destroy_enabled
+	test_double_enable
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+	vlan_destroy $h1 200
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64
+	ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+	ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+	vlan_destroy $h2 200
+	simple_if_fini $h2
+}
+
+router_rp1_200_create()
+{
+	ip link add name $rp1.200 link $rp1 type vlan id 200
+	ip link set dev $rp1.200 addrgenmode eui64
+	ip link set dev $rp1.200 up
+	ip address add dev $rp1.200 192.0.2.2/28
+	ip address add dev $rp1.200 2001:db8:1::2/64
+	ip stats set dev $rp1.200 l3_stats on
+}
+
+router_rp1_200_destroy()
+{
+	ip stats set dev $rp1.200 l3_stats off
+	ip address del dev $rp1.200 2001:db8:1::2/64
+	ip address del dev $rp1.200 192.0.2.2/28
+	ip link del dev $rp1.200
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	router_rp1_200_create
+
+	ip link set dev $rp2 up
+	vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	vlan_destroy $rp2 200
+	ip link set dev $rp2 down
+
+	router_rp1_200_destroy
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1mac=$(mac_get $rp1)
+	rp2mac=$(mac_get $rp2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1.200 192.0.2.18 " IPv4"
+}
+
+ping_ipv6()
+{
+	ping_test $h1.200 2001:db8:2::1 " IPv6"
+}
+
+send_packets_rx_ipv4()
+{
+	# Send 21 packets instead of 20, because the first one might trap and go
+	# through the SW datapath, which might not bump the HW counter.
+	$MZ $h1.200 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_rx_ipv6()
+{
+	$MZ $h1.200 -6 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv4()
+{
+	$MZ $h2.200 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv6()
+{
+	$MZ $h2.200 -6 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+___test_stats()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	local a
+	local b
+
+	a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+	send_packets_${dir}_${prot}
+	"$@"
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		       hw_stats_get l3_stats $rp1.200 ${dir} packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+}
+
+__test_stats()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	RET=0
+	___test_stats "$dir" "$prot"
+	log_test "Test $dir packets: $prot"
+}
+
+test_stats_rx_ipv4()
+{
+	__test_stats rx ipv4
+}
+
+test_stats_tx_ipv4()
+{
+	__test_stats tx ipv4
+}
+
+test_stats_rx_ipv6()
+{
+	__test_stats rx ipv6
+}
+
+test_stats_tx_ipv6()
+{
+	__test_stats tx ipv6
+}
+
+# Make sure everything works well even after stats have been disabled and
+# reenabled on the same device without touching the L3 configuration.
+respin_enablement()
+{
+	log_info "Turning stats off and on again"
+	ip stats set dev $rp1.200 l3_stats off
+	ip stats set dev $rp1.200 l3_stats on
+}
+
+# For the initial run, l3_stats is enabled on a completely set up netdevice. Now
+# do it the other way around: enabling the L3 stats on an L2 netdevice, and only
+# then apply the L3 configuration.
+reapply_config()
+{
+	log_info "Reapplying configuration"
+
+	router_rp1_200_destroy
+
+	ip link add name $rp1.200 link $rp1 type vlan id 200
+	ip link set dev $rp1.200 addrgenmode none
+	ip stats set dev $rp1.200 l3_stats on
+	ip link set dev $rp1.200 addrgenmode eui64
+	ip link set dev $rp1.200 up
+	ip address add dev $rp1.200 192.0.2.2/28
+	ip address add dev $rp1.200 2001:db8:1::2/64
+}
+
+__test_stats_report()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	local a
+	local b
+
+	RET=0
+
+	a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+	send_packets_${dir}_${prot}
+	ip address flush dev $rp1.200
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		       hw_stats_get l3_stats $rp1.200 ${dir} packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+	log_test "Test ${dir} packets: stats pushed on loss of L3"
+
+	ip stats set dev $rp1.200 l3_stats off
+	ip link del dev $rp1.200
+	router_rp1_200_create
+}
+
+test_stats_report_rx()
+{
+	__test_stats_report rx ipv4
+}
+
+test_stats_report_tx()
+{
+	__test_stats_report tx ipv4
+}
+
+test_destroy_enabled()
+{
+	RET=0
+
+	ip link del dev $rp1.200
+	router_rp1_200_create
+
+	log_test "Destroy l3_stats-enabled netdev"
+}
+
+test_double_enable()
+{
+	RET=0
+	___test_stats rx ipv4 \
+		ip stats set dev $rp1.200 l3_stats on
+	log_test "Test stat retention across a spurious enablement"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
+	   jq '.[].info.l3_stats.used')
+kind=$(ip -j -d link show dev $rp1 |
+	   jq -r '.[].linkinfo.info_kind')
+if [[ $used != true ]]; then
+	if [[ $kind == veth ]]; then
+		log_test_skip "l3_stats not offloaded on veth interface"
+		EXIT_STATUS=$ksft_skip
+	else
+		RET=1 log_test "l3_stats not offloaded"
+	fi
+else
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
new file mode 100755
index 000000000000..7594bbb49029
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test L3 stats on IP-in-IP GRE tunnel without key.
+
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="
+	ping_ipv4
+	test_stats_rx
+	test_stats_tx
+"
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	ol1mac=$(mac_get $ol1)
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create gre $ol1 $ul1
+	sw2_flat_create gre $ol2 $ul2
+	ip stats set dev g1a l3_stats on
+	ip stats set dev g2a l3_stats on
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip stats set dev g1a l3_stats off
+	ip stats set dev g2a l3_stats off
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+	forwarding_restore
+}
+
+ping_ipv4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre flat"
+}
+
+send_packets_ipv4()
+{
+	# Send 21 packets instead of 20, because the first one might trap and go
+	# through the SW datapath, which might not bump the HW counter.
+	$MZ $h1 -c 21 -d 20msec -p 100 \
+	    -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+test_stats()
+{
+	local dev=$1; shift
+	local dir=$1; shift
+
+	local a
+	local b
+
+	RET=0
+
+	a=$(hw_stats_get l3_stats $dev $dir packets)
+	send_packets_ipv4
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		     hw_stats_get l3_stats $dev $dir packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+
+	log_test "Test $dir packets: $prot"
+}
+
+test_stats_tx()
+{
+	test_stats g1a tx
+}
+
+test_stats_rx()
+{
+	test_stats g2a rx
+}
+
+skip_on_veth
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
new file mode 100755
index 000000000000..49fa94b53a1c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test ipv6 stats on the incoming if when forwarding with VRF
+
+ALL_TESTS="
+	ipv6_ping
+	ipv6_in_too_big_err
+	ipv6_in_hdr_err
+	ipv6_in_addr_err
+	ipv6_in_discard
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+require_command $TROUTE6
+
+h1_create()
+{
+	simple_if_init $h1 2001:1:1::2/64
+	ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1
+	simple_if_fini $h1 2001:1:1::2/64
+}
+
+router_create()
+{
+	vrf_create router
+	__simple_if_init $rtr1 router 2001:1:1::1/64
+	__simple_if_init $rtr2 router 2001:1:2::1/64
+	mtu_set $rtr2 1280
+}
+
+router_destroy()
+{
+	mtu_restore $rtr2
+	__simple_if_fini $rtr2 2001:1:2::1/64
+	__simple_if_fini $rtr1 2001:1:1::1/64
+	vrf_destroy router
+}
+
+h2_create()
+{
+	simple_if_init $h2 2001:1:2::2/64
+	ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1
+	mtu_set $h2 1280
+}
+
+h2_destroy()
+{
+	mtu_restore $h2
+	ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1
+	simple_if_fini $h2 2001:1:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rtr1=${NETIFS[p2]}
+
+	rtr2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	h1_create
+	router_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	router_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ipv6_in_too_big_err()
+{
+	RET=0
+
+	local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
+	local vrf_name=$(master_name_get $h1)
+
+	# Send too big packets
+	ip vrf exec $vrf_name \
+		$PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+
+	local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
+	test "$((t1 - t0))" -ne 0
+	check_err $?
+	log_test "Ip6InTooBigErrors"
+}
+
+ipv6_in_hdr_err()
+{
+	RET=0
+
+	local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors)
+	local vrf_name=$(master_name_get $h1)
+
+	# Send packets with hop limit 1, easiest with traceroute6 as some ping6
+	# doesn't allow hop limit to be specified
+	ip vrf exec $vrf_name \
+		$TROUTE6 2001:1:2::2 &> /dev/null
+
+	local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors)
+	test "$((t1 - t0))" -ne 0
+	check_err $?
+	log_test "Ip6InHdrErrors"
+}
+
+ipv6_in_addr_err()
+{
+	RET=0
+
+	local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
+	local vrf_name=$(master_name_get $h1)
+
+	# Disable forwarding temporary while sending the packet
+	sysctl -qw net.ipv6.conf.all.forwarding=0
+	ip vrf exec $vrf_name \
+		$PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+	sysctl -qw net.ipv6.conf.all.forwarding=1
+
+	local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
+	test "$((t1 - t0))" -ne 0
+	check_err $?
+	log_test "Ip6InAddrErrors"
+}
+
+ipv6_in_discard()
+{
+	RET=0
+
+	local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards)
+	local vrf_name=$(master_name_get $h1)
+
+	# Add a policy to discard
+	ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block
+	ip vrf exec $vrf_name \
+		$PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+	ip xfrm policy del dst 2001:1:2::2/128 dir fwd
+
+	local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards)
+	test "$((t1 - t0))" -ne 0
+	check_err $?
+	log_test "Ip6InDiscards"
+}
+ipv6_ping()
+{
+	RET=0
+
+	ping6_test $h1 2001:1:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..2ab9eaaa5532
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -0,0 +1,466 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv6 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |   198.51.100.{2-253}/24 |      |
+# |   2001:db8:1::{2-fd}/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|-------------------+
+# | SW1                     |                   |
+# |                    $ol1 +                   |
+# |         198.51.100.1/24                     |
+# |        2001:db8:1::1/64                     |
+# |                                             |
+# |+ g1 (ip6gre)                                |
+# |  loc=2001:db8:3::1                          |
+# |  rem=2001:db8:3::2 -.                       |
+# |     tos=inherit     |                       |
+# |                     v                       |
+# |                     + $ul1                  |
+# |                     | 2001:db8:10::1/64     |
+# +---------------------|-----------------------+
+#                       |
+# +---------------------|-----------------------+
+# | SW2                 |                       |
+# |               $ul21 +                       |
+# |   2001:db8:10::2/64 |                       |
+# |                     |                       |
+# !   __________________+___                    |
+# |  /                      \                   |
+# |  |                      |                   |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan)  |
+# |  | 2001:db8:11::1/64    | 2001:db8:12::1/64 |
+# |  |                      |                   |
+# +--|----------------------|-------------------+
+#    |                      |
+# +--|----------------------|-------------------+
+# |  |                      |                   |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan)  |
+# |  | 2001:db8:11::2/64    | 2001:db8:12::2/64 |
+# |  |                      |                   |
+# |  \__________________+___/                   |
+# |                     |                       |
+# |                     |                       |
+# |               $ul31 +                       |
+# |   2001:db8:13::1/64 |                   SW3 |
+# +---------------------|-----------------------+
+#                       |
+# +---------------------|-----------------------+
+# |                     + $ul4                  |
+# |                     ^ 2001:db8:13::2/64     |
+# |                     |                       |
+# |+ g2 (ip6gre)        |                       |
+# |  loc=2001:db8:3::2  |                       |
+# |  rem=2001:db8:3::1 -'                       |
+# |  tos=inherit                                |
+# |                                             |
+# |                    $ol4 +                   |
+# |          203.0.113.1/24 |                   |
+# |        2001:db8:2::1/64 |               SW4 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|------+
+# |                         |      |
+# |                     $h2 +      |
+# |    203.0.113.{2-253}/24        |
+# |   2001:db8:2::{2-fd}/64     H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+	ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+	ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 default
+	ip route del vrf v$h1 default
+	simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+	__simple_if_init $ul1 v$ol1 2001:db8:10::1/64
+
+	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		dev v$ol1
+	__simple_if_init g1 v$ol1 2001:db8:3::1/128
+	ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2
+
+	ip route add vrf v$ol1 203.0.113.0/24 dev g1
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 203.0.113.0/24
+
+	ip route del vrf v$ol1 2001:db8:3::2/128
+	__simple_if_fini g1 2001:db8:3::1/128
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 2001:db8:10::1/64
+	simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 2001:db8:10::2/64
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 2001:db8:11::1/64
+	vlan_create $ul22 222 v$ul21 2001:db8:12::1/64
+
+	ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1
+	ip -6 route add vrf v$ul21 2001:db8:3::2/128 \
+	   nexthop via 2001:db8:11::2 \
+	   nexthop via 2001:db8:12::2
+}
+
+sw2_destroy()
+{
+	ip -6 route del vrf v$ul21 2001:db8:3::2/128
+	ip -6 route del vrf v$ul21 2001:db8:3::1/128
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 2001:db8:10::2/64
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 2001:db8:13::1/64
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 2001:db8:11::2/64
+	vlan_create $ul32 222 v$ul31 2001:db8:12::2/64
+
+	ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2
+	ip -6 route add vrf v$ul31 2001:db8:3::1/128 \
+	   nexthop via 2001:db8:11::1 \
+	   nexthop via 2001:db8:12::1
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip -6 route del vrf v$ul31 2001:db8:3::1/128
+	ip -6 route del vrf v$ul31 2001:db8:3::2/128
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 2001:db8:13::1/64
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+	__simple_if_init $ul4 v$ol4 2001:db8:13::2/64
+
+	tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+		dev v$ol4
+	__simple_if_init g2 v$ol4 2001:db8:3::2/128
+	ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1
+
+	ip route add vrf v$ol4 198.51.100.0/24 dev g2
+	ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+	ip -6 route del vrf v$ol4 2001:db8:1::/64
+	ip route del vrf v$ol4 198.51.100.0/24
+
+	ip -6 route del vrf v$ol4 2001:db8:3::1/128
+	__simple_if_fini g2 2001:db8:3::2/128
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 2001:db8:13::2/64
+	simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+	ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+	ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 default
+	ip route del vrf v$h2 default
+	simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B 203.0.113.2 \
+		-d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+	ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+		-A 198.51.100.2 -B 203.0.113.2 \
+		-d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+		-d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+	# Generate 16384 echo requests, each with a random flow label.
+	for _ in $(seq 1 16384); do
+		ip vrf exec v$h1 \
+			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+	done
+}
+
+send_src_udp6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+	ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+		-A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+	local field="$1"; shift
+	local balanced="$1"; shift
+	local send_flows="$@"
+
+	RET=0
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	$send_flows
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+
+	local diff=$((d222 - d111))
+	local sum=$((d111 + d222))
+
+	local pct=$(echo "$diff / $sum * 100" | bc -l)
+	local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+	[[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+	   ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+	check_err $? "Expected traffic to be $balanced, but it is not"
+
+	log_test "Multipath hash field: $field ($balanced)"
+	log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+	log_info "Running IPv4 overlay custom multipath hash tests"
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+	custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+	custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+	custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+	custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+	custom_hash_test "Inner source port" "balanced" send_src_udp4
+	custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+	custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+	custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+	sysctl_restore net.ipv4.neigh.default.gc_thresh3
+	sysctl_restore net.ipv4.neigh.default.gc_thresh2
+	sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+	log_info "Running IPv6 overlay custom multipath hash tests"
+
+	# Prevent the neighbour table from overflowing, as different neighbour
+	# entries will be created on $ol4 when using different destination IPs.
+	sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+	sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+	custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+	custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+	custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+	custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200
+	custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+	custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+	custom_hash_test "Inner source port" "balanced" send_src_udp6
+	custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+	sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+	custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+	custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+	sysctl_restore net.ipv6.neigh.default.gc_thresh3
+	sysctl_restore net.ipv6.neigh.default.gc_thresh2
+	sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+	# Test that when the hash policy is set to custom, traffic is
+	# distributed only according to the fields set in the
+	# fib_multipath_hash_fields sysctl.
+	#
+	# Each time set a different field and make sure traffic is only
+	# distributed when the field is changed in the packet stream.
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+	custom_hash_v4
+	custom_hash_v6
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
new file mode 100755
index 000000000000..96c97064f2d3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel without key.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+	gre_flat
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create $ol1 $ul1
+	sw2_flat_create $ol2 $ul2
+}
+
+gre_flat()
+{
+	test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6"
+	test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
new file mode 100755
index 000000000000..ff9fb0db9bd1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel with key.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+	gre_flat
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create $ol1 $ul1 key 233
+	sw2_flat_create $ol2 $ul2 key 233
+}
+
+gre_flat()
+{
+	test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key"
+	test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
new file mode 100755
index 000000000000..12c138785242
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel with keys.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+	gre_flat
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create $ol1 $ul1 ikey 111 okey 222
+	sw2_flat_create $ol2 $ul2 ikey 222 okey 111
+}
+
+gre_flat()
+{
+	test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey"
+	test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change	gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
new file mode 100755
index 000000000000..83b55c30a5c3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+	gre_hier
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create $ol1 $ul1
+	sw2_hierarchical_create $ol2 $ul2
+}
+
+gre_hier()
+{
+	test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6"
+	test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
new file mode 100755
index 000000000000..256607916d92
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+	gre_hier
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create $ol1 $ul1 key 22
+	sw2_hierarchical_create $ol2 $ul2 key 22
+}
+
+gre_hier()
+{
+	test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key"
+	test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
new file mode 100755
index 000000000000..ad1bcd6334a8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+	gre_hier
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create $ol1 $ul1 ikey 111 okey 222
+	sw2_hierarchical_create $ol2 $ul2 ikey 222 okey 111
+}
+
+gre_hier()
+{
+	test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey"
+	test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
index a257979d3fc5..32d1461f37b7 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
@@ -266,7 +266,7 @@ multipath4_test()
 
 	ip vrf exec v$h1 \
 	   $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
-	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	       -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
 	sleep 1
 
 	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
index d208f5243ade..e1a4b50505f5 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
@@ -265,9 +265,9 @@ multipath6_test()
 	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
 
 	ip vrf exec v$h1 \
-	   $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
-	       -B "2001:db8:2::2-2001:db8:2::1e" \
-	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	   $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \
+	       -B "2001:db8:2::2-2001:db8:2::3e" \
+	       -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
 	sleep 1
 
 	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
new file mode 100644
index 000000000000..24f4ab328bd2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
@@ -0,0 +1,438 @@
+# SPDX-License-Identifier: GPL-2.0
+#!/bin/bash
+
+# Handles creation and destruction of IP-in-IP or GRE tunnels over the given
+# topology. Supports both flat and hierarchical models.
+#
+# Flat Model:
+# Overlay and underlay share the same VRF.
+# SW1 uses default VRF so tunnel has no bound dev.
+# SW2 uses non-default VRF tunnel has a bound dev.
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |        198.51.100.1/24  |      |
+# |        2001:db8:1::1/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|-------------------+
+# | SW1                     |                   |
+# |                    $ol1 +                   |
+# |        198.51.100.2/24                      |
+# |        2001:db8:1::2/64                     |
+# |                                             |
+# |      + g1a (ip6gre)                         |
+# |        loc=2001:db8:3::1                    |
+# |        rem=2001:db8:3::2 --.                |
+# |        tos=inherit         |                |
+# |                            .                |
+# |      .---------------------                 |
+# |      |                                      |
+# |      v                                      |
+# |      + $ul1.111 (vlan)                      |
+# |      | 2001:db8:10::1/64                    |
+# |       \                                     |
+# |        \____________                        |
+# |                     |                       |
+# | VRF default         + $ul1                  |
+# +---------------------|-----------------------+
+#                       |
+# +---------------------|-----------------------+
+# | SW2                 |                       |
+# |                $ul2 +                       |
+# |          ___________|                       |
+# |         /                                   |
+# |        /                                    |
+# |       + $ul2.111 (vlan)                     |
+# |       ^ 2001:db8:10::2/64                   |
+# |       |                                     |
+# |       |                                     |
+# |       '----------------------.              |
+# |       + g2a (ip6gre)         |              |
+# |         loc=2001:db8:3::2    |              |
+# |         rem=2001:db8:3::1  --'              |
+# |         tos=inherit                         |
+# |                                             |
+# |                     + $ol2                  |
+# |                     | 203.0.113.2/24        |
+# | VRF v$ol2           | 2001:db8:2::2/64      |
+# +---------------------|-----------------------+
+# +---------------------|----------+
+# | H2                  |          |
+# |                 $h2 +          |
+# |    203.0.113.1/24              |
+# |    2001:db8:2::1/64            |
+# +--------------------------------+
+#
+# Hierarchical model:
+# The tunnel is bound to a device in a different VRF
+#
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |        198.51.100.1/24  |      |
+# |        2001:db8:1::1/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|-------------------+
+# | SW1                     |                   |
+# | +-----------------------|-----------------+ |
+# | |                  $ol1 +                 | |
+# | |      198.51.100.2/24                    | |
+# | |      2001:db8:1::2/64                   | |
+# | |                                         | |
+# | |              + g1a (ip6gre)             | |
+# | |                loc=2001:db8:3::1        | |
+# | |                rem=2001:db8:3::2        | |
+# | |                tos=inherit              | |
+# | |                    ^                    | |
+# | |   VRF v$ol1        |                    | |
+# | +--------------------|--------------------+ |
+# |                      |                      |
+# | +--------------------|--------------------+ |
+# | |   VRF v$ul1        |                    | |
+# | |                    |                    | |
+# | |                    v                    | |
+# | |             dummy1 +                    | |
+# | |       2001:db8:3::1/64                  | |
+# | |         .-----------'                   | |
+# | |         |                               | |
+# | |         v                               | |
+# | |         + $ul1.111 (vlan)               | |
+# | |         | 2001:db8:10::1/64             | |
+# | |         \                               | |
+# | |          \__________                    | |
+# | |                     |                   | |
+# | |                     + $ul1              | |
+# | +---------------------|-------------------+ |
+# +-----------------------|---------------------+
+#                         |
+# +-----------------------|---------------------+
+# | SW2                   |                     |
+# | +---------------------|-------------------+ |
+# | |                     + $ul2              | |
+# | |                _____|                   | |
+# | |               /                         | |
+# | |              /                          | |
+# | |              | $ul2.111 (vlan)          | |
+# | |              + 2001:db8:10::2/64        | |
+# | |              ^                          | |
+# | |              |                          | |
+# | |              '------.                   | |
+# | |              dummy2 +                   | |
+# | |              2001:db8:3::2/64           | |
+# | |                     ^                   | |
+# | |                     |                   | |
+# | |                     |                   | |
+# | | VRF v$ul2           |                   | |
+# | +---------------------|-------------------+ |
+# |                       |                     |
+# | +---------------------|-------------------+ |
+# | | VRF v$ol2           |                   | |
+# | |                     |                   | |
+# | |                     v                   | |
+# | |        g2a (ip6gre) +                   | |
+# | |        loc=2001:db8:3::2                | |
+# | |        rem=2001:db8:3::1                | |
+# | |        tos=inherit                      | |
+# | |                                         | |
+# | |                $ol2 +                   | |
+# | |    203.0.113.2/24   |                   | |
+# | |    2001:db8:2::2/64 |                   | |
+# | +---------------------|-------------------+ |
+# +-----------------------|---------------------+
+#                         |
+# +-----------------------|--------+
+# | H2                    |        |
+# |                   $h2 +        |
+# |      203.0.113.1/24            |
+# |      2001:db8:2::1/64          |
+# +--------------------------------+
+
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.1/24 2001:db8:1::1/64
+	ip route add vrf v$h1 203.0.113.0/24 via 198.51.100.2
+	ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+	ip route del vrf v$h1 203.0.113.0/24 via 198.51.100.2
+	simple_if_fini $h1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 203.0.113.1/24 2001:db8:2::1/64
+	ip route add vrf v$h2 198.51.100.0/24 via 203.0.113.2
+	ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2
+	ip route del vrf v$h2 198.51.100.0/24 via 203.0.113.2
+	simple_if_fini $h2 203.0.113.1/24 2001:db8:2::1/64
+}
+
+sw1_flat_create()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip link set dev $ol1 up
+        __addr_add_del $ol1 add 198.51.100.2/24 2001:db8:1::2/64
+
+	ip link set dev $ul1 up
+	vlan_create $ul1 111 "" 2001:db8:10::1/64
+
+	tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit "$@"
+	ip link set dev g1a up
+        __addr_add_del g1a add "2001:db8:3::1/128"
+
+	ip -6 route add 2001:db8:3::2/128 via 2001:db8:10::2
+	ip route add 203.0.113.0/24 dev g1a
+	ip -6 route add 2001:db8:2::/64 dev g1a
+}
+
+sw1_flat_destroy()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip -6 route del 2001:db8:2::/64
+	ip route del 203.0.113.0/24
+	ip -6 route del 2001:db8:3::2/128 via 2001:db8:10::2
+
+	__simple_if_fini g1a 2001:db8:3::1/128
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	__simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw2_flat_create()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 2001:db8:10::2/64
+
+	tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+		ttl inherit dev v$ol2 "$@"
+	__simple_if_init g2a v$ol2 2001:db8:3::2/128
+
+	# Replace neighbor to avoid 1 dropped packet due to "unresolved neigh"
+	ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2)
+	ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2)
+
+	ip -6 route add vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1
+	ip route add vrf v$ol2 198.51.100.0/24 dev g2a
+	ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a
+}
+
+sw2_flat_destroy()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	ip -6 route del vrf v$ol2 2001:db8:2::/64
+	ip route del vrf v$ol2 198.51.100.0/24
+	ip -6 route del vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1
+
+	__simple_if_fini g2a 2001:db8:3::2/128
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+sw1_hierarchical_create()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	simple_if_init $ol1 198.51.100.2/24 2001:db8:1::2/64
+	simple_if_init $ul1
+	ip link add name dummy1 type dummy
+	__simple_if_init dummy1 v$ul1 2001:db8:3::1/64
+
+	vlan_create $ul1 111 v$ul1 2001:db8:10::1/64
+	tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit dev dummy1 "$@"
+	ip link set dev g1a master v$ol1
+
+	ip -6 route add vrf v$ul1 2001:db8:3::2/128 via 2001:db8:10::2
+	ip route add vrf v$ol1 203.0.113.0/24 dev g1a
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1a
+}
+
+sw1_hierarchical_destroy()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 203.0.113.0/24
+	ip -6 route del vrf v$ul1 2001:db8:3::2/128
+
+	tunnel_destroy g1a
+	vlan_destroy $ul1 111
+
+	__simple_if_fini dummy1 2001:db8:3::1/64
+	ip link del dev dummy1
+
+	simple_if_fini $ul1
+	simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw2_hierarchical_create()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64
+	simple_if_init $ul2
+
+	ip link add name dummy2 type dummy
+	__simple_if_init dummy2 v$ul2 2001:db8:3::2/64
+
+	vlan_create $ul2 111 v$ul2 2001:db8:10::2/64
+	tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+		ttl inherit dev dummy2 "$@"
+	ip link set dev g2a master v$ol2
+
+	# Replace neighbor to avoid 1 dropped packet due to "unresolved neigh"
+	ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2)
+	ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2)
+
+	ip -6 route add vrf v$ul2 2001:db8:3::1/128 via 2001:db8:10::1
+	ip route add vrf v$ol2 198.51.100.0/24 dev g2a
+	ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a
+}
+
+sw2_hierarchical_destroy()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	ip -6 route del vrf v$ol2 2001:db8:2::/64
+	ip route del vrf v$ol2 198.51.100.0/24
+	ip -6 route del vrf v$ul2 2001:db8:3::1/128
+
+	tunnel_destroy g2a
+	vlan_destroy $ul2 111
+
+	__simple_if_fini dummy2 2001:db8:3::2/64
+	ip link del dev dummy2
+
+	simple_if_fini $ul2
+	simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+test_traffic_ip4ip6()
+{
+	RET=0
+
+	h1mac=$(mac_get $h1)
+	ol1mac=$(mac_get $ol1)
+
+	tc qdisc add dev $ul1 clsact
+	tc filter add dev $ul1 egress proto all pref 1 handle 101 \
+		flower $TC_FLAG action pass
+
+	tc qdisc add dev $ol2 clsact
+	tc filter add dev $ol2 egress protocol ipv4 pref 1 handle 101 \
+		flower $TC_FLAG dst_ip 203.0.113.1 action pass
+
+	$MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \
+		-B 203.0.113.1 -t ip -q -d $MZ_DELAY
+
+	# Check ports after encap and after decap.
+	tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
+	check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG"
+
+	tc_check_at_least_x_packets "dev $ol2 egress" 101 1000
+	check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG"
+
+	log_test "$@"
+
+	tc filter del dev $ol2 egress protocol ipv4 pref 1 handle 101 flower
+	tc qdisc del dev $ol2 clsact
+	tc filter del dev $ul1 egress proto all pref 1 handle 101 flower
+	tc qdisc del dev $ul1 clsact
+}
+
+test_traffic_ip6ip6()
+{
+	RET=0
+
+	h1mac=$(mac_get $h1)
+	ol1mac=$(mac_get $ol1)
+
+	tc qdisc add dev $ul1 clsact
+	tc filter add dev $ul1 egress proto all pref 1 handle 101 \
+		flower $TC_FLAG action pass
+
+	tc qdisc add dev $ol2 clsact
+	tc filter add dev $ol2 egress protocol ipv6 pref 1 handle 101 \
+		flower $TC_FLAG dst_ip 2001:db8:2::1 action pass
+
+	$MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \
+		-B 2001:db8:2::1 -t ip -q -d $MZ_DELAY
+
+	# Check ports after encap and after decap.
+	tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
+	check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG"
+
+	tc_check_at_least_x_packets "dev $ol2 egress" 101 1000
+	check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG"
+
+	log_test "$@"
+
+	tc filter del dev $ol2 egress protocol ipv6 pref 1 handle 101 flower
+	tc qdisc del dev $ol2 clsact
+	tc filter del dev $ul1 egress proto all pref 1 handle 101 flower
+	tc qdisc del dev $ul1 clsact
+}
+
+topo_mtu_change()
+{
+	local mtu=$1
+
+	ip link set mtu $mtu dev $h1
+	ip link set mtu $mtu dev $ol1
+	ip link set mtu $mtu dev g1a
+	ip link set mtu $mtu dev $ul1
+	ip link set mtu $mtu dev $ul1.111
+	ip link set mtu $mtu dev $h2
+	ip link set mtu $mtu dev $ol2
+	ip link set mtu $mtu dev g2a
+	ip link set mtu $mtu dev $ul2
+	ip link set mtu $mtu dev $ul2.111
+}
+
+test_mtu_change()
+{
+	RET=0
+
+	ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3"
+	check_fail $? "ping GRE IPv6 should not pass with packet size 1800"
+
+	RET=0
+
+	topo_mtu_change	2000
+	ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3"
+	check_err $?
+	log_test "ping GRE IPv6, packet size 1800 after MTU change"
+}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 927f9ba49e08..e579c2e0c462 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -8,6 +8,7 @@
 PING=${PING:=ping}
 PING6=${PING6:=ping6}
 MZ=${MZ:=mausezahn}
+MZ_DELAY=${MZ_DELAY:=0}
 ARPING=${ARPING:=arping}
 TEAMD=${TEAMD:=teamd}
 WAIT_TIME=${WAIT_TIME:=5}
@@ -17,18 +18,51 @@ NETIF_TYPE=${NETIF_TYPE:=veth}
 NETIF_CREATE=${NETIF_CREATE:=yes}
 MCD=${MCD:=smcrouted}
 MC_CLI=${MC_CLI:=smcroutectl}
+PING_COUNT=${PING_COUNT:=10}
 PING_TIMEOUT=${PING_TIMEOUT:=5}
 WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
 INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
-
-relative_path="${BASH_SOURCE%/*}"
-if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
-	relative_path="."
+LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
+REQUIRE_JQ=${REQUIRE_JQ:=yes}
+REQUIRE_MZ=${REQUIRE_MZ:=yes}
+REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
+STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
+TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
+TROUTE6=${TROUTE6:=traceroute6}
+
+net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+if [[ -f $net_forwarding_dir/forwarding.config ]]; then
+	source "$net_forwarding_dir/forwarding.config"
 fi
 
-if [[ -f $relative_path/forwarding.config ]]; then
-	source "$relative_path/forwarding.config"
-fi
+source "$net_forwarding_dir/../lib.sh"
+
+# timeout in seconds
+slowwait()
+{
+	local timeout=$1; shift
+
+	local start_time="$(date -u +%s)"
+	while true
+	do
+		local out
+		out=$("$@")
+		local ret=$?
+		if ((!ret)); then
+			echo -n "$out"
+			return 0
+		fi
+
+		local current_time="$(date -u +%s)"
+		if ((current_time - start_time > timeout)); then
+			echo -n "$out"
+			return 1
+		fi
+
+		sleep 0.1
+	done
+}
 
 ##############################################################################
 # Sanity checks
@@ -38,7 +72,48 @@ check_tc_version()
 	tc -j &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing JSON support"
-		exit 1
+		exit $ksft_skip
+	fi
+}
+
+# Old versions of tc don't understand "mpls_uc"
+check_tc_mpls_support()
+{
+	local dev=$1; shift
+
+	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
+		matchall action pipe &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing MPLS support"
+		return $ksft_skip
+	fi
+	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
+		matchall
+}
+
+# Old versions of tc produce invalid json output for mpls lse statistics
+check_tc_mpls_lse_stats()
+{
+	local dev=$1; shift
+	local ret;
+
+	tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
+		flower mpls lse depth 2                                 \
+		action continue &> /dev/null
+
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
+		return $ksft_skip
+	fi
+
+	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
+	ret=$?
+	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
+		flower
+
+	if [[ $ret -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
+		return $ksft_skip
 	fi
 }
 
@@ -47,7 +122,7 @@ check_tc_shblock_support()
 	tc filter help 2>&1 | grep block &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing shared block support"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
@@ -56,7 +131,7 @@ check_tc_chain_support()
 	tc help 2>&1|grep chain &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing chain support"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
@@ -65,13 +140,85 @@ check_tc_action_hw_stats_support()
 	tc actions help 2>&1 | grep -q hw_stats
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
-		exit 1
+		exit $ksft_skip
+	fi
+}
+
+check_tc_fp_support()
+{
+	tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp "
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing frame preemption support"
+		exit $ksft_skip
+	fi
+}
+
+check_ethtool_lanes_support()
+{
+	ethtool --help 2>&1| grep lanes &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: ethtool too old; it is missing lanes support"
+		exit $ksft_skip
+	fi
+}
+
+check_ethtool_mm_support()
+{
+	ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: ethtool too old; it is missing MAC Merge layer support"
+		exit $ksft_skip
+	fi
+}
+
+check_ethtool_counter_group_support()
+{
+	ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: ethtool too old; it is missing standard counter group support"
+		exit $ksft_skip
+	fi
+}
+
+check_ethtool_pmac_std_stats_support()
+{
+	local dev=$1; shift
+	local grp=$1; shift
+
+	[ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \
+		| jq ".[].\"$grp\" | length") ]
+}
+
+check_locked_port_support()
+{
+	if ! bridge -d link show | grep -q " locked"; then
+		echo "SKIP: iproute2 too old; Locked port feature not supported."
+		return $ksft_skip
+	fi
+}
+
+check_port_mab_support()
+{
+	if ! bridge -d link show | grep -q "mab"; then
+		echo "SKIP: iproute2 too old; MacAuth feature not supported."
+		return $ksft_skip
+	fi
+}
+
+skip_on_veth()
+{
+	local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
+		jq -r '.[].linkinfo.info_kind')
+
+	if [[ $kind == veth ]]; then
+		echo "SKIP: Test cannot be run with veth pairs"
+		exit $ksft_skip
 	fi
 }
 
 if [[ "$(id -u)" -ne 0 ]]; then
 	echo "SKIP: need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [[ "$CHECK_TC" = "yes" ]]; then
@@ -84,16 +231,26 @@ require_command()
 
 	if [[ ! -x "$(command -v "$cmd")" ]]; then
 		echo "SKIP: $cmd not installed"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
-require_command jq
-require_command $MZ
+if [[ "$REQUIRE_JQ" = "yes" ]]; then
+	require_command jq
+fi
+if [[ "$REQUIRE_MZ" = "yes" ]]; then
+	require_command $MZ
+fi
+if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
+	# https://github.com/vladimiroltean/mtools/
+	# patched for IPv6 support
+	require_command msend
+	require_command mreceive
+fi
 
 if [[ ! -v NUM_NETIFS ]]; then
 	echo "SKIP: importer does not define \"NUM_NETIFS\""
-	exit 1
+	exit $ksft_skip
 fi
 
 ##############################################################################
@@ -121,6 +278,11 @@ create_netif_veth()
 	for ((i = 1; i <= NUM_NETIFS; ++i)); do
 		local j=$((i+1))
 
+		if [ -z ${NETIFS[p$i]} ]; then
+			echo "SKIP: Cannot create interface. Name not specified"
+			exit $ksft_skip
+		fi
+
 		ip link show dev ${NETIFS[p$i]} &> /dev/null
 		if [[ $? -ne 0 ]]; then
 			ip link add ${NETIFS[p$i]} type veth \
@@ -145,15 +307,46 @@ create_netif()
 	esac
 }
 
+declare -A MAC_ADDR_ORIG
+mac_addr_prepare()
+{
+	local new_addr=
+	local dev=
+
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
+		dev=${NETIFS[p$i]}
+		new_addr=$(printf "00:01:02:03:04:%02x" $i)
+
+		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
+		# Strip quotes
+		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
+		ip link set dev $dev address $new_addr
+	done
+}
+
+mac_addr_restore()
+{
+	local dev=
+
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
+		dev=${NETIFS[p$i]}
+		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
+	done
+}
+
 if [[ "$NETIF_CREATE" = "yes" ]]; then
 	create_netif
 fi
 
+if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+	mac_addr_prepare
+fi
+
 for ((i = 1; i <= NUM_NETIFS; ++i)); do
 	ip link show dev ${NETIFS[p$i]} &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: could not find all required interfaces"
-		exit 1
+		exit $ksft_skip
 	fi
 done
 
@@ -227,6 +420,15 @@ log_test()
 	return 0
 }
 
+log_test_skip()
+{
+	local test_name=$1
+	local opt_str=$2
+
+	printf "TEST: %-60s  [SKIP]\n" "$test_name $opt_str"
+	return 0
+}
+
 log_info()
 {
 	local msg=$1
@@ -234,33 +436,24 @@ log_info()
 	echo "INFO: $msg"
 }
 
-busywait()
+not()
 {
-	local timeout=$1; shift
+	"$@"
+	[[ $? != 0 ]]
+}
 
-	local start_time="$(date -u +%s%3N)"
-	while true
-	do
-		local out
-		out=$("$@")
-		local ret=$?
-		if ((!ret)); then
-			echo -n "$out"
-			return 0
-		fi
+get_max()
+{
+	local arr=("$@")
 
-		local current_time="$(date -u +%s%3N)"
-		if ((current_time - start_time > timeout)); then
-			echo -n "$out"
-			return 1
+	max=${arr[0]}
+	for cur in ${arr[@]}; do
+		if [[ $cur -gt $max ]]; then
+			max=$cur
 		fi
 	done
-}
 
-not()
-{
-	"$@"
-	[[ $? != 0 ]]
+	echo $max
 }
 
 grep_bridge_fdb()
@@ -279,11 +472,21 @@ grep_bridge_fdb()
 	$@ | grep $addr | grep $flag "$word"
 }
 
+wait_for_port_up()
+{
+	"$@" | grep -q "Link detected: yes"
+}
+
 wait_for_offload()
 {
 	"$@" | grep -q offload
 }
 
+wait_for_trap()
+{
+	"$@" | grep -q trap
+}
+
 until_counter_is()
 {
 	local expr=$1; shift
@@ -302,6 +505,15 @@ busywait_for_counter()
 	busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
 }
 
+slowwait_for_counter()
+{
+	local timeout=$1; shift
+	local delta=$1; shift
+
+	local base=$("$@")
+	slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
+}
+
 setup_wait_dev()
 {
 	local dev=$1; shift
@@ -374,33 +586,16 @@ cmd_jq()
 	[ ! -z "$output" ]
 }
 
-lldpad_app_wait_set()
-{
-	local dev=$1; shift
-
-	while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
-		echo "$dev: waiting for lldpad to push pending APP updates"
-		sleep 5
-	done
-}
-
-lldpad_app_wait_del()
-{
-	# Give lldpad a chance to push down the changes. If the device is downed
-	# too soon, the updates will be left pending. However, they will have
-	# been struck off the lldpad's DB already, so we won't be able to tell
-	# they are pending. Then on next test iteration this would cause
-	# weirdness as newly-added APP rules conflict with the old ones,
-	# sometimes getting stuck in an "unknown" state.
-	sleep 5
-}
-
 pre_cleanup()
 {
 	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
 		echo "Pausing before cleanup, hit any key to continue"
 		read
 	fi
+
+	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+		mac_addr_restore
+	fi
 }
 
 vrf_prepare()
@@ -640,8 +835,9 @@ tc_rule_handle_stats_get()
 	local id=$1; shift
 	local handle=$1; shift
 	local selector=${1:-.packets}; shift
+	local netns=${1:-""}; shift
 
-	tc -j -s filter show $id \
+	tc $netns -j -s filter show $id \
 	    | jq ".[] | select(.options.handle == $handle) | \
 		  .options.actions[0].stats$selector"
 }
@@ -654,6 +850,17 @@ ethtool_stats_get()
 	ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
 }
 
+ethtool_std_stats_get()
+{
+	local dev=$1; shift
+	local grp=$1; shift
+	local name=$1; shift
+	local src=$1; shift
+
+	ethtool --json -S $dev --groups $grp -- --src $src | \
+		jq '.[]."'"$grp"'"."'$name'"'
+}
+
 qdisc_stats_get()
 {
 	local dev=$1; shift
@@ -674,6 +881,52 @@ qdisc_parent_stats_get()
 	    | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
 }
 
+ipv6_stats_get()
+{
+	local dev=$1; shift
+	local stat=$1; shift
+
+	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
+}
+
+hw_stats_get()
+{
+	local suite=$1; shift
+	local if_name=$1; shift
+	local dir=$1; shift
+	local stat=$1; shift
+
+	ip -j stats show dev $if_name group offload subgroup $suite |
+		jq ".[0].stats64.$dir.$stat"
+}
+
+__nh_stats_get()
+{
+	local key=$1; shift
+	local group_id=$1; shift
+	local member_id=$1; shift
+
+	ip -j -s -s nexthop show id $group_id |
+	    jq --argjson member_id "$member_id" --arg key "$key" \
+	       '.[].group_stats[] | select(.id == $member_id) | .[$key]'
+}
+
+nh_stats_get()
+{
+	local group_id=$1; shift
+	local member_id=$1; shift
+
+	__nh_stats_get packets "$group_id" "$member_id"
+}
+
+nh_stats_get_hw()
+{
+	local group_id=$1; shift
+	local member_id=$1; shift
+
+	__nh_stats_get packets_hw "$group_id" "$member_id"
+}
+
 humanize()
 {
 	local speed=$1; shift
@@ -698,6 +951,15 @@ rate()
 	echo $((8 * (t1 - t0) / interval))
 }
 
+packets_rate()
+{
+	local t0=$1; shift
+	local t1=$1; shift
+	local interval=$1; shift
+
+	echo $(((t1 - t0) / interval))
+}
+
 mac_get()
 {
 	local if_name=$1
@@ -705,6 +967,15 @@ mac_get()
 	ip -j link show dev $if_name | jq -r '.[]["address"]'
 }
 
+ipv6_lladdr_get()
+{
+	local if_name=$1
+
+	ip -j addr show dev $if_name | \
+		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
+		head -1
+}
+
 bridge_ageing_time_get()
 {
 	local bridge=$1
@@ -723,14 +994,14 @@ sysctl_set()
 	local value=$1; shift
 
 	SYSCTL_ORIG[$key]=$(sysctl -n $key)
-	sysctl -qw $key=$value
+	sysctl -qw $key="$value"
 }
 
 sysctl_restore()
 {
 	local key=$1; shift
 
-	sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+	sysctl -qw $key="${SYSCTL_ORIG[$key]}"
 }
 
 forwarding_enable()
@@ -1002,7 +1273,8 @@ ping_do()
 
 	vrf_name=$(master_name_get $if_name)
 	ip vrf exec $vrf_name \
-		$PING $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null
+		$PING $args $dip -c $PING_COUNT -i 0.1 \
+		-w $PING_TIMEOUT &> /dev/null
 }
 
 ping_test()
@@ -1014,6 +1286,15 @@ ping_test()
 	log_test "ping$3"
 }
 
+ping_test_fails()
+{
+	RET=0
+
+	ping_do $1 $2
+	check_fail $?
+	log_test "ping fails$3"
+}
+
 ping6_do()
 {
 	local if_name=$1
@@ -1023,7 +1304,8 @@ ping6_do()
 
 	vrf_name=$(master_name_get $if_name)
 	ip vrf exec $vrf_name \
-		$PING6 $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null
+		$PING6 $args $dip -c $PING_COUNT -i 0.1 \
+		-w $PING_TIMEOUT &> /dev/null
 }
 
 ping6_test()
@@ -1035,6 +1317,15 @@ ping6_test()
 	log_test "ping6$3"
 }
 
+ping6_test_fails()
+{
+	RET=0
+
+	ping6_do $1 $2
+	check_fail $?
+	log_test "ping6 fails$3"
+}
+
 learning_test()
 {
 	local bridge=$1
@@ -1055,6 +1346,7 @@ learning_test()
 	# FDB entry was installed.
 	bridge link set dev $br_port1 flood off
 
+	ip link set $host1_if promisc on
 	tc qdisc add dev $host1_if ingress
 	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
 		flower dst_mac $mac action drop
@@ -1065,7 +1357,7 @@ learning_test()
 	tc -j -s filter show dev $host1_if ingress \
 		| jq -e ".[] | select(.options.handle == 101) \
 		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
-	check_fail $? "Packet reached second host when should not"
+	check_fail $? "Packet reached first host when should not"
 
 	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
 	sleep 1
@@ -1104,6 +1396,7 @@ learning_test()
 
 	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
 	tc qdisc del dev $host1_if ingress
+	ip link set $host1_if promisc off
 
 	bridge link set dev $br_port1 flood on
 
@@ -1121,6 +1414,7 @@ flood_test_do()
 
 	# Add an ACL on `host2_if` which will tell us whether the packet
 	# was flooded to it or not.
+	ip link set $host2_if promisc on
 	tc qdisc add dev $host2_if ingress
 	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
 		flower dst_mac $mac action drop
@@ -1138,6 +1432,7 @@ flood_test_do()
 
 	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
 	tc qdisc del dev $host2_if ingress
+	ip link set $host2_if promisc off
 
 	return $err
 }
@@ -1201,25 +1496,40 @@ flood_test()
 
 __start_traffic()
 {
+	local pktsize=$1; shift
 	local proto=$1; shift
 	local h_in=$1; shift    # Where the traffic egresses the host
 	local sip=$1; shift
 	local dip=$1; shift
 	local dmac=$1; shift
 
-	$MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
+	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
 		-a own -b $dmac -t "$proto" -q "$@" &
 	sleep 1
 }
 
+start_traffic_pktsize()
+{
+	local pktsize=$1; shift
+
+	__start_traffic $pktsize udp "$@"
+}
+
+start_tcp_traffic_pktsize()
+{
+	local pktsize=$1; shift
+
+	__start_traffic $pktsize tcp "$@"
+}
+
 start_traffic()
 {
-	__start_traffic udp "$@"
+	start_traffic_pktsize 8000 "$@"
 }
 
 start_tcp_traffic()
 {
-	__start_traffic tcp "$@"
+	start_tcp_traffic_pktsize 8000 "$@"
 }
 
 stop_traffic()
@@ -1228,13 +1538,17 @@ stop_traffic()
 	{ kill %% && wait %%; } 2>/dev/null
 }
 
+declare -A cappid
+declare -A capfile
+declare -A capout
+
 tcpdump_start()
 {
 	local if_name=$1; shift
 	local ns=$1; shift
 
-	capfile=$(mktemp)
-	capout=$(mktemp)
+	capfile[$if_name]=$(mktemp)
+	capout[$if_name]=$(mktemp)
 
 	if [ -z $ns ]; then
 		ns_cmd=""
@@ -1248,25 +1562,485 @@ tcpdump_start()
 		capuser="-Z $SUDO_USER"
 	fi
 
-	$ns_cmd tcpdump -e -n -Q in -i $if_name \
-		-s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
-	cappid=$!
+	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
+		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
+		> "${capout[$if_name]}" 2>&1 &
+	cappid[$if_name]=$!
 
 	sleep 1
 }
 
 tcpdump_stop()
 {
-	$ns_cmd kill $cappid
+	local if_name=$1
+	local pid=${cappid[$if_name]}
+
+	$ns_cmd kill "$pid" && wait "$pid"
 	sleep 1
 }
 
 tcpdump_cleanup()
 {
-	rm $capfile $capout
+	local if_name=$1
+
+	rm ${capfile[$if_name]} ${capout[$if_name]}
 }
 
 tcpdump_show()
 {
-	tcpdump -e -n -r $capfile 2>&1
+	local if_name=$1
+
+	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
+}
+
+# return 0 if the packet wasn't seen on host2_if or 1 if it was
+mcast_packet_test()
+{
+	local mac=$1
+	local src_ip=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
+	local seen=0
+	local tc_proto="ip"
+	local mz_v6arg=""
+
+	# basic check to see if we were passed an IPv4 address, if not assume IPv6
+	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+		tc_proto="ipv6"
+		mz_v6arg="-6"
+	fi
+
+	# Add an ACL on `host2_if` which will tell us whether the packet
+	# was received by it or not.
+	tc qdisc add dev $host2_if ingress
+	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
+		flower ip_proto udp dst_mac $mac action drop
+
+	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
+	sleep 1
+
+	tc -j -s filter show dev $host2_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	if [[ $? -eq 0 ]]; then
+		seen=1
+	fi
+
+	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
+	tc qdisc del dev $host2_if ingress
+
+	return $seen
+}
+
+brmcast_check_sg_entries()
+{
+	local report=$1; shift
+	local slist=("$@")
+	local sarg=""
+
+	for src in "${slist[@]}"; do
+		sarg="${sarg} and .source_list[].address == \"$src\""
+	done
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
+	check_err $? "Wrong *,G entry source list after $report report"
+
+	for sgent in "${slist[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
+		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
+	done
+}
+
+brmcast_check_sg_fwding()
+{
+	local should_fwd=$1; shift
+	local sources=("$@")
+
+	for src in "${sources[@]}"; do
+		local retval=0
+
+		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
+		retval=$?
+		if [ $should_fwd -eq 1 ]; then
+			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
+		else
+			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
+		fi
+	done
+}
+
+brmcast_check_sg_state()
+{
+	local is_blocked=$1; shift
+	local sources=("$@")
+	local should_fail=1
+
+	if [ $is_blocked -eq 1 ]; then
+		should_fail=0
+	fi
+
+	for src in "${sources[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
+				 .source_list[] |
+				 select(.address == \"$src\") |
+				 select(.timer == \"0.00\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has zero timer"
+
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
+				 .flags[] == \"blocked\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has blocked flag"
+	done
+}
+
+mc_join()
+{
+	local if_name=$1
+	local group=$2
+	local vrf_name=$(master_name_get $if_name)
+
+	# We don't care about actual reception, just about joining the
+	# IP multicast group and adding the L2 address to the device's
+	# MAC filtering table
+	ip vrf exec $vrf_name \
+		mreceive -g $group -I $if_name > /dev/null 2>&1 &
+	mreceive_pid=$!
+
+	sleep 1
+}
+
+mc_leave()
+{
+	kill "$mreceive_pid" && wait "$mreceive_pid"
+}
+
+mc_send()
+{
+	local if_name=$1
+	local groups=$2
+	local vrf_name=$(master_name_get $if_name)
+
+	ip vrf exec $vrf_name \
+		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
+}
+
+start_ip_monitor()
+{
+	local mtype=$1; shift
+	local ip=${1-ip}; shift
+
+	# start the monitor in the background
+	tmpfile=`mktemp /var/run/nexthoptestXXX`
+	mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
+	sleep 0.2
+	echo "$mpid $tmpfile"
+}
+
+stop_ip_monitor()
+{
+	local mpid=$1; shift
+	local tmpfile=$1; shift
+	local el=$1; shift
+	local what=$1; shift
+
+	sleep 0.2
+	kill $mpid
+	local lines=`grep '^\w' $tmpfile | wc -l`
+	test $lines -eq $el
+	check_err $? "$what: $lines lines of events, expected $el"
+	rm -rf $tmpfile
+}
+
+hw_stats_monitor_test()
+{
+	local dev=$1; shift
+	local type=$1; shift
+	local make_suitable=$1; shift
+	local make_unsuitable=$1; shift
+	local ip=${1-ip}; shift
+
+	RET=0
+
+	# Expect a notification about enablement.
+	local ipmout=$(start_ip_monitor stats "$ip")
+	$ip stats set dev $dev ${type}_stats on
+	stop_ip_monitor $ipmout 1 "${type}_stats enablement"
+
+	# Expect a notification about offload.
+	local ipmout=$(start_ip_monitor stats "$ip")
+	$make_suitable
+	stop_ip_monitor $ipmout 1 "${type}_stats installation"
+
+	# Expect a notification about loss of offload.
+	local ipmout=$(start_ip_monitor stats "$ip")
+	$make_unsuitable
+	stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
+
+	# Expect a notification about disablement
+	local ipmout=$(start_ip_monitor stats "$ip")
+	$ip stats set dev $dev ${type}_stats off
+	stop_ip_monitor $ipmout 1 "${type}_stats disablement"
+
+	log_test "${type}_stats notifications"
+}
+
+ipv4_to_bytes()
+{
+	local IP=$1; shift
+
+	printf '%02x:' ${IP//./ } |
+	    sed 's/:$//'
+}
+
+# Convert a given IPv6 address, `IP' such that the :: token, if present, is
+# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
+# digits. An optional `BYTESEP' parameter can be given to further separate
+# individual bytes of each 16-bit group.
+expand_ipv6()
+{
+	local IP=$1; shift
+	local bytesep=$1; shift
+
+	local cvt_ip=${IP/::/_}
+	local colons=${cvt_ip//[^:]/}
+	local allcol=:::::::
+	# IP where :: -> the appropriate number of colons:
+	local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
+
+	echo $allcol_ip | tr : '\n' |
+	    sed s/^/0000/ |
+	    sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
+	    tr '\n' : |
+	    sed 's/:$//'
+}
+
+ipv6_to_bytes()
+{
+	local IP=$1; shift
+
+	expand_ipv6 "$IP" :
+}
+
+u16_to_bytes()
+{
+	local u16=$1; shift
+
+	printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
+}
+
+# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
+# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
+# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
+# stands for 00:00.
+payload_template_calc_checksum()
+{
+	local payload=$1; shift
+
+	(
+	    # Set input radix.
+	    echo "16i"
+	    # Push zero for the initial checksum.
+	    echo 0
+
+	    # Pad the payload with a terminating 00: in case we get an odd
+	    # number of bytes.
+	    echo "${payload%:}:00:" |
+		sed 's/CHECKSUM/00:00/g' |
+		tr '[:lower:]' '[:upper:]' |
+		# Add the word to the checksum.
+		sed 's/\(..\):\(..\):/\1\2+\n/g' |
+		# Strip the extra odd byte we pushed if left unconverted.
+		sed 's/\(..\):$//'
+
+	    echo "10000 ~ +"	# Calculate and add carry.
+	    echo "FFFF r - p"	# Bit-flip and print.
+	) |
+	    dc |
+	    tr '[:upper:]' '[:lower:]'
+}
+
+payload_template_expand_checksum()
+{
+	local payload=$1; shift
+	local checksum=$1; shift
+
+	local ckbytes=$(u16_to_bytes $checksum)
+
+	echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
+}
+
+payload_template_nbytes()
+{
+	local payload=$1; shift
+
+	payload_template_expand_checksum "${payload%:}" 0 |
+		sed 's/:/\n/g' | wc -l
+}
+
+igmpv3_is_in_get()
+{
+	local GRP=$1; shift
+	local sources=("$@")
+
+	local igmpv3
+	local nsources=$(u16_to_bytes ${#sources[@]})
+
+	# IS_IN ( $sources )
+	igmpv3=$(:
+		)"22:"$(			: Type - Membership Report
+		)"00:"$(			: Reserved
+		)"CHECKSUM:"$(			: Checksum
+		)"00:00:"$(			: Reserved
+		)"00:01:"$(			: Number of Group Records
+		)"01:"$(			: Record Type - IS_IN
+		)"00:"$(			: Aux Data Len
+		)"${nsources}:"$(		: Number of Sources
+		)"$(ipv4_to_bytes $GRP):"$(	: Multicast Address
+		)"$(for src in "${sources[@]}"; do
+			ipv4_to_bytes $src
+			echo -n :
+		    done)"$(			: Source Addresses
+		)
+	local checksum=$(payload_template_calc_checksum "$igmpv3")
+
+	payload_template_expand_checksum "$igmpv3" $checksum
+}
+
+igmpv2_leave_get()
+{
+	local GRP=$1; shift
+
+	local payload=$(:
+		)"17:"$(			: Type - Leave Group
+		)"00:"$(			: Max Resp Time - not meaningful
+		)"CHECKSUM:"$(			: Checksum
+		)"$(ipv4_to_bytes $GRP)"$(	: Group Address
+		)
+	local checksum=$(payload_template_calc_checksum "$payload")
+
+	payload_template_expand_checksum "$payload" $checksum
+}
+
+mldv2_is_in_get()
+{
+	local SIP=$1; shift
+	local GRP=$1; shift
+	local sources=("$@")
+
+	local hbh
+	local icmpv6
+	local nsources=$(u16_to_bytes ${#sources[@]})
+
+	hbh=$(:
+		)"3a:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)
+
+	icmpv6=$(:
+		)"8f:"$(			: Type - MLDv2 Report
+		)"00:"$(			: Code
+		)"CHECKSUM:"$(			: Checksum
+		)"00:00:"$(			: Reserved
+		)"00:01:"$(			: Number of Group Records
+		)"01:"$(			: Record Type - IS_IN
+		)"00:"$(			: Aux Data Len
+		)"${nsources}:"$(		: Number of Sources
+		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
+		)"$(for src in "${sources[@]}"; do
+			ipv6_to_bytes $src
+			echo -n :
+		    done)"$(			: Source Addresses
+		)
+
+	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+	local sudohdr=$(:
+		)"$(ipv6_to_bytes $SIP):"$(	: SIP
+		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
+	        )"${len}:"$(			: Upper-layer length
+	        )"00:3a:"$(			: Zero and next-header
+	        )
+	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+	payload_template_expand_checksum "$hbh$icmpv6" $checksum
+}
+
+mldv1_done_get()
+{
+	local SIP=$1; shift
+	local GRP=$1; shift
+
+	local hbh
+	local icmpv6
+
+	hbh=$(:
+		)"3a:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)
+
+	icmpv6=$(:
+		)"84:"$(			: Type - MLDv1 Done
+		)"00:"$(			: Code
+		)"CHECKSUM:"$(			: Checksum
+		)"00:00:"$(			: Max Resp Delay - not meaningful
+		)"00:00:"$(			: Reserved
+		)"$(ipv6_to_bytes $GRP):"$(	: Multicast address
+		)
+
+	local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+	local sudohdr=$(:
+		)"$(ipv6_to_bytes $SIP):"$(	: SIP
+		)"$(ipv6_to_bytes $GRP):"$(	: DIP is multicast address
+	        )"${len}:"$(			: Upper-layer length
+	        )"00:3a:"$(			: Zero and next-header
+	        )
+	local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+	payload_template_expand_checksum "$hbh$icmpv6" $checksum
+}
+
+bail_on_lldpad()
+{
+	local reason1="$1"; shift
+	local reason2="$1"; shift
+
+	if systemctl is-active --quiet lldpad; then
+
+		cat >/dev/stderr <<-EOF
+		WARNING: lldpad is running
+
+			lldpad will likely $reason1, and this test will
+			$reason2. Both are not supported at the same time,
+			one of them is arbitrarily going to overwrite the
+			other. That will cause spurious failures (or, unlikely,
+			passes) of this test.
+		EOF
+
+		if [[ -z $ALLOW_LLDPAD ]]; then
+			cat >/dev/stderr <<-EOF
+
+				If you want to run the test anyway, please set
+				an environment variable ALLOW_LLDPAD to a
+				non-empty string.
+			EOF
+			exit 1
+		else
+			return
+		fi
+	fi
+}
+
+absval()
+{
+	local v=$1; shift
+
+	echo $((v > 0 ? v : -v))
 }
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
new file mode 100755
index 000000000000..c5b0cbc85b3e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -0,0 +1,299 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone bridge"
+NUM_NETIFS=2
+PING_COUNT=1
+REQUIRE_MTOOLS=yes
+REQUIRE_MZ=no
+
+source lib.sh
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+BRIDGE_ADDR="00:00:de:ad:be:ee"
+MACVLAN_ADDR="00:00:de:ad:be:ef"
+UNKNOWN_UC_ADDR1="de:ad:be:ef:ee:03"
+UNKNOWN_UC_ADDR2="de:ad:be:ef:ee:04"
+UNKNOWN_UC_ADDR3="de:ad:be:ef:ee:05"
+JOINED_IPV4_MC_ADDR="225.1.2.3"
+UNKNOWN_IPV4_MC_ADDR1="225.1.2.4"
+UNKNOWN_IPV4_MC_ADDR2="225.1.2.5"
+UNKNOWN_IPV4_MC_ADDR3="225.1.2.6"
+JOINED_IPV6_MC_ADDR="ff2e::0102:0304"
+UNKNOWN_IPV6_MC_ADDR1="ff2e::0102:0305"
+UNKNOWN_IPV6_MC_ADDR2="ff2e::0102:0306"
+UNKNOWN_IPV6_MC_ADDR3="ff2e::0102:0307"
+
+JOINED_MACV4_MC_ADDR="01:00:5e:01:02:03"
+UNKNOWN_MACV4_MC_ADDR1="01:00:5e:01:02:04"
+UNKNOWN_MACV4_MC_ADDR2="01:00:5e:01:02:05"
+UNKNOWN_MACV4_MC_ADDR3="01:00:5e:01:02:06"
+JOINED_MACV6_MC_ADDR="33:33:01:02:03:04"
+UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05"
+UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06"
+UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07"
+
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# Disable promisc to ensure we don't receive unknown MAC DA packets
+export TCPDUMP_EXTRA_FLAGS="-pl"
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+send_non_ip()
+{
+	local if_name=$1
+	local smac=$2
+	local dmac=$3
+
+	$MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+	ping_do $if_name $H2_IPV4
+	ip neigh del $H2_IPV4 dev $if_name
+}
+
+check_rcv()
+{
+	local if_name=$1
+	local type=$2
+	local pattern=$3
+	local should_receive=$4
+	local should_fail=
+
+	[ $should_receive = true ] && should_fail=0 || should_fail=1
+	RET=0
+
+	tcpdump_show $if_name | grep -q "$pattern"
+
+	check_err_fail "$should_fail" "$?" "reception"
+
+	log_test "$if_name: $type"
+}
+
+mc_route_prepare()
+{
+	local if_name=$1
+	local vrf_name=$(master_name_get $if_name)
+
+	ip route add 225.100.1.0/24 dev $if_name vrf $vrf_name
+	ip -6 route add ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+mc_route_destroy()
+{
+	local if_name=$1
+	local vrf_name=$(master_name_get $if_name)
+
+	ip route del 225.100.1.0/24 dev $if_name vrf $vrf_name
+	ip -6 route del ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+run_test()
+{
+	local rcv_if_name=$1
+	local smac=$(mac_get $h1)
+	local rcv_dmac=$(mac_get $rcv_if_name)
+
+	tcpdump_start $rcv_if_name
+
+	mc_route_prepare $h1
+	mc_route_prepare $rcv_if_name
+
+	send_uc_ipv4 $h1 $rcv_dmac
+	send_uc_ipv4 $h1 $MACVLAN_ADDR
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1
+
+	ip link set dev $rcv_if_name promisc on
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2
+	ip link set dev $rcv_if_name promisc off
+
+	mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR
+	mc_send $h1 $JOINED_IPV4_MC_ADDR
+	mc_leave
+
+	mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR
+	mc_send $h1 $JOINED_IPV6_MC_ADDR
+	mc_leave
+
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1
+
+	ip link set dev $rcv_if_name allmulticast on
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3
+	ip link set dev $rcv_if_name allmulticast off
+
+	mc_route_destroy $rcv_if_name
+	mc_route_destroy $h1
+
+	sleep 1
+
+	tcpdump_stop $rcv_if_name
+
+	check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \
+		"$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \
+		"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+		"$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
+		"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \
+		"$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
+		"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to joined group" \
+		"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
+		true
+
+	tcpdump_cleanup $rcv_if_name
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_create()
+{
+	ip link add br0 type bridge
+	ip link set br0 address $BRIDGE_ADDR
+	ip link set br0 up
+
+	ip link set $h2 master br0
+	ip link set $h2 up
+
+	simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_destroy()
+{
+	simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
+
+	ip link del br0
+}
+
+standalone()
+{
+	h1_create
+	h2_create
+
+	ip link add link $h2 name macvlan0 type macvlan mode private
+	ip link set macvlan0 address $MACVLAN_ADDR
+	ip link set macvlan0 up
+
+	run_test $h2
+
+	ip link del macvlan0
+
+	h2_destroy
+	h1_destroy
+}
+
+bridge()
+{
+	h1_create
+	bridge_create
+
+	ip link add link br0 name macvlan0 type macvlan mode private
+	ip link set macvlan0 address $MACVLAN_ADDR
+	ip link set macvlan0 up
+
+	run_test br0
+
+	ip link del macvlan0
+
+	bridge_destroy
+	h1_destroy
+}
+
+cleanup()
+{
+	pre_cleanup
+	vrf_cleanup
+}
+
+setup_prepare()
+{
+	vrf_prepare
+	# setup_wait() needs this
+	ip link set $h1 up
+	ip link set $h2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
index 360ca133bead..6c257ec03756 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -98,6 +98,7 @@ switch_create()
 	# Bridge between H1 and H2.
 
 	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 addrgenmode none
 	ip link set dev br1 up
 
 	ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
index c5095da7f6bf..04fd14b0a9b7 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -65,7 +65,8 @@ setup_prepare()
 	vrf_prepare
 	mirror_gre_topo_create
 
-	ip link add name br2 type bridge vlan_filtering 0
+	ip link add name br2 address $(mac_get $swp3) \
+		type bridge vlan_filtering 0
 	ip link set dev br2 up
 
 	ip link set dev $swp3 master br2
@@ -93,12 +94,16 @@ cleanup()
 
 test_gretap()
 {
+	ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+		 nud permanent dev br2
 	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
 	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
 }
 
 test_ip6gretap()
 {
+	ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+		nud permanent dev br2
 	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
 	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
 }
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
index 197e769c2ed1..f35313c76fac 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -35,7 +35,8 @@ setup_prepare()
 	vrf_prepare
 	mirror_gre_topo_create
 
-	ip link add name br2 type bridge vlan_filtering 0
+	ip link add name br2 address $(mac_get $swp3) \
+		type bridge vlan_filtering 0
 	ip link set dev br2 up
 
 	vlan_create $swp3 555
@@ -80,17 +81,26 @@ test_gretap()
 
 test_ip6gretap()
 {
-	test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+	test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \
 			"mirror to ip6gretap"
 }
 
 test_gretap_stp()
 {
+	# Sometimes after mirror installation, the neighbor's state is not valid.
+	# The reason is that there is no SW datapath activity related to the
+	# neighbor for the remote GRE address. Therefore whether the corresponding
+	# neighbor will be valid is a matter of luck, and the test is thus racy.
+	# Set the neighbor's state to permanent, so it would be always valid.
+	ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+		nud permanent dev br2
 	full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
 }
 
 test_ip6gretap_stp()
 {
+	ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+		nud permanent dev br2
 	full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
 }
 
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index a3402cd8d5b6..0cf4c47a46f9 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -61,9 +61,12 @@ setup_prepare()
 
 	vrf_prepare
 	mirror_gre_topo_create
+	# Avoid changing br1's PVID while it is operational as a L3 interface.
+	ip link set dev br1 down
 
 	ip link set dev $swp3 master br1
 	bridge vlan add dev br1 vid 555 pvid untagged self
+	ip link set dev br1 up
 	ip address add dev br1 192.0.2.129/28
 	ip address add dev br1 2001:db8:2::1/64
 
@@ -87,12 +90,16 @@ cleanup()
 
 test_gretap()
 {
+	ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+		 nud permanent dev br1
 	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
 	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
 }
 
 test_ip6gretap()
 {
+	ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+		nud permanent dev br1
 	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
 	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
 }
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index 28d568c48a73..c53148b1dc63 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -140,13 +140,15 @@ switch_create()
 	ip link set dev $swp3 up
 	ip link set dev $swp4 up
 
-	ip link add name br1 type bridge vlan_filtering 1
-	ip link set dev br1 up
-	__addr_add_del br1 add 192.0.2.129/32
-	ip -4 route add 192.0.2.130/32 dev br1
+	ip link add name br1 address $(mac_get $swp3) \
+		type bridge vlan_filtering 1
 
 	team_create lag loadbalance $swp3 $swp4
 	ip link set dev lag master br1
+
+	ip link set dev br1 up
+	__addr_add_del br1 add 192.0.2.129/32
+	ip -4 route add 192.0.2.130/32 dev br1
 }
 
 switch_destroy()
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
index 472bd023e2a5..5ea9d63915f7 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -72,9 +72,10 @@ test_span_gre_ttl()
 
 	RET=0
 
-	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	mirror_install $swp1 ingress $tundev \
+		"prot ip flower $tcflags ip_prot icmp"
 	tc filter add dev $h3 ingress pref 77 prot $prot \
-		flower ip_ttl 50 action pass
+		flower skip_hw ip_ttl 50 action pass
 
 	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
 
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index fac486178ef7..0c36546e131e 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-source "$relative_path/mirror_lib.sh"
+source "$net_forwarding_dir/mirror_lib.sh"
 
 quick_test_span_gre_dir_ips()
 {
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
index 39c03e2867f4..6e615fffa4ef 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -33,7 +33,7 @@
 #   |                                                                         |
 #   +-------------------------------------------------------------------------+
 
-source "$relative_path/mirror_topo_lib.sh"
+source "$net_forwarding_dir/mirror_topo_lib.sh"
 
 mirror_gre_topo_h3_create()
 {
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index c02291e9841e..c8a9b5bd841f 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -141,7 +141,7 @@ test_gretap()
 
 test_ip6gretap()
 {
-	test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+	test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \
 			"mirror to ip6gretap"
 }
 
@@ -218,6 +218,7 @@ test_ip6gretap_forbidden_egress()
 test_span_gre_untagged_egress()
 {
 	local tundev=$1; shift
+	local ul_proto=$1; shift
 	local what=$1; shift
 
 	RET=0
@@ -225,7 +226,7 @@ test_span_gre_untagged_egress()
 	mirror_install $swp1 ingress $tundev "matchall $tcflags"
 
 	quick_test_span_gre_dir $tundev ingress
-	quick_test_span_vlan_dir $h3 555 ingress
+	quick_test_span_vlan_dir $h3 555 ingress "$ul_proto"
 
 	h3_addr_add_del del $h3.555
 	bridge vlan add dev $swp3 vid 555 pvid untagged
@@ -233,7 +234,7 @@ test_span_gre_untagged_egress()
 	sleep 5
 
 	quick_test_span_gre_dir $tundev ingress
-	fail_test_span_vlan_dir $h3 555 ingress
+	fail_test_span_vlan_dir $h3 555 ingress "$ul_proto"
 
 	h3_addr_add_del del $h3
 	bridge vlan add dev $swp3 vid 555
@@ -241,7 +242,7 @@ test_span_gre_untagged_egress()
 	sleep 5
 
 	quick_test_span_gre_dir $tundev ingress
-	quick_test_span_vlan_dir $h3 555 ingress
+	quick_test_span_vlan_dir $h3 555 ingress "$ul_proto"
 
 	mirror_uninstall $swp1 ingress
 
@@ -250,12 +251,12 @@ test_span_gre_untagged_egress()
 
 test_gretap_untagged_egress()
 {
-	test_span_gre_untagged_egress gt4 "mirror to gretap"
+	test_span_gre_untagged_egress gt4 ip "mirror to gretap"
 }
 
 test_ip6gretap_untagged_egress()
 {
-	test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
+	test_span_gre_untagged_egress gt6 ipv6 "mirror to ip6gretap"
 }
 
 test_span_gre_fdb_roaming()
@@ -271,7 +272,7 @@ test_span_gre_fdb_roaming()
 
 	while ((RET == 0)); do
 		bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null
-		bridge fdb add dev $swp2 $h3mac vlan 555 master
+		bridge fdb add dev $swp2 $h3mac vlan 555 master static
 		sleep 1
 		fail_test_span_gre_dir $tundev ingress
 
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 13db1cb50e57..3e8ebeff3019 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -20,6 +20,13 @@ mirror_uninstall()
 	tc filter del dev $swp1 $direction pref 1000
 }
 
+is_ipv6()
+{
+	local addr=$1; shift
+
+	[[ -z ${addr//[0-9a-fA-F:]/} ]]
+}
+
 mirror_test()
 {
 	local vrf_name=$1; shift
@@ -29,9 +36,17 @@ mirror_test()
 	local pref=$1; shift
 	local expect=$1; shift
 
+	if is_ipv6 $dip; then
+		local proto=-6
+		local type="icmp6 type=128" # Echo request.
+	else
+		local proto=
+		local type="icmp echoreq"
+	fi
+
 	local t0=$(tc_rule_stats_get $dev $pref)
-	$MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
-	    -c 10 -d 100msec -t icmp type=8
+	$MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
+	    -c 10 -d 100msec -t $type
 	sleep 0.5
 	local t1=$(tc_rule_stats_get $dev $pref)
 	local delta=$((t1 - t0))
@@ -100,13 +115,14 @@ do_test_span_vlan_dir_ips()
 	local dev=$1; shift
 	local vid=$1; shift
 	local direction=$1; shift
+	local ul_proto=$1; shift
 	local ip1=$1; shift
 	local ip2=$1; shift
 
 	# Install the capture as skip_hw to avoid double-counting of packets.
 	# The traffic is meant for local box anyway, so will be trapped to
 	# kernel.
-	vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
+	vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto"
 	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
 	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
 	vlan_capture_uninstall $dev
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
index 04979e5962e7..bb1adbb7b98a 100644
--- a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -60,6 +60,7 @@ mirror_topo_switch_create()
 	ip link set dev $swp3 up
 
 	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 addrgenmode none
 	ip link set dev br1 up
 
 	ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
index 9ab2ce77b332..0b44e148235e 100755
--- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -85,9 +85,9 @@ test_tagged_vlan_dir()
 	RET=0
 
 	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
-	do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+	do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \
 				  192.0.2.17 192.0.2.18
-	do_test_span_vlan_dir_ips  0 "$h3.555" 555 "$direction" \
+	do_test_span_vlan_dir_ips  0 "$h3.555" 555 "$direction" ip \
 				  192.0.2.17 192.0.2.18
 	mirror_uninstall $swp1 $direction
 
diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
new file mode 100755
index 000000000000..af3b398d13f0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone two_bridges one_bridge_two_pvids"
+NUM_NETIFS=4
+
+source lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p3]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+IPV4_ALLNODES="224.0.0.1"
+IPV6_ALLNODES="ff02::1"
+MACV4_ALLNODES="01:00:5e:00:00:01"
+MACV6_ALLNODES="33:33:00:00:00:01"
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# The full 4K VLAN space is too much to check, so strategically pick some
+# values which should provide reasonable coverage
+vids=(0 1 2 5 10 20 50 100 200 500 1000 1000 2000 4000 4094)
+
+send_non_ip()
+{
+	local if_name=$1
+	local smac=$2
+	local dmac=$3
+
+	$MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+	ping_do $if_name $H2_IPV4
+	ip neigh del $H2_IPV4 dev $if_name
+}
+
+send_mc_ipv4()
+{
+	local if_name=$1
+
+	ping_do $if_name $IPV4_ALLNODES "-I $if_name"
+}
+
+send_uc_ipv6()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip -6 neigh add $H2_IPV6 lladdr $dmac dev $if_name
+	ping6_do $if_name $H2_IPV6
+	ip -6 neigh del $H2_IPV6 dev $if_name
+}
+
+send_mc_ipv6()
+{
+	local if_name=$1
+
+	ping6_do $if_name $IPV6_ALLNODES%$if_name
+}
+
+check_rcv()
+{
+	local if_name=$1
+	local type=$2
+	local pattern=$3
+	local should_fail=1
+
+	RET=0
+
+	tcpdump_show $if_name | grep -q "$pattern"
+
+	check_err_fail "$should_fail" "$?" "reception"
+
+	log_test "$type"
+}
+
+run_test()
+{
+	local test_name="$1"
+	local smac=$(mac_get $h1)
+	local dmac=$(mac_get $h2)
+	local h1_ipv6_lladdr=$(ipv6_lladdr_get $h1)
+	local vid=
+
+	echo "$test_name: Sending packets"
+
+	tcpdump_start $h2
+
+	send_non_ip $h1 $smac $dmac
+	send_non_ip $h1 $smac $NON_IP_MC
+	send_non_ip $h1 $smac $BC
+	send_uc_ipv4 $h1 $dmac
+	send_mc_ipv4 $h1
+	send_uc_ipv6 $h1 $dmac
+	send_mc_ipv6 $h1
+
+	for vid in "${vids[@]}"; do
+		vlan_create $h1 $vid
+		simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+
+		send_non_ip $h1.$vid $smac $dmac
+		send_non_ip $h1.$vid $smac $NON_IP_MC
+		send_non_ip $h1.$vid $smac $BC
+		send_uc_ipv4 $h1.$vid $dmac
+		send_mc_ipv4 $h1.$vid
+		send_uc_ipv6 $h1.$vid $dmac
+		send_mc_ipv6 $h1.$vid
+
+		simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+		vlan_destroy $h1 $vid
+	done
+
+	sleep 1
+
+	echo "$test_name: Checking which packets were received"
+
+	tcpdump_stop $h2
+
+	check_rcv $h2 "$test_name: Unicast non-IP untagged" \
+		"$smac > $dmac, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Multicast non-IP untagged" \
+		"$smac > $NON_IP_MC, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Broadcast non-IP untagged" \
+		"$smac > $BC, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Unicast IPv4 untagged" \
+		"$smac > $dmac, ethertype IPv4 (0x0800)"
+
+	check_rcv $h2 "$test_name: Multicast IPv4 untagged" \
+		"$smac > $MACV4_ALLNODES, ethertype IPv4 (0x0800).*: $H1_IPV4 > $IPV4_ALLNODES"
+
+	check_rcv $h2 "$test_name: Unicast IPv6 untagged" \
+		"$smac > $dmac, ethertype IPv6 (0x86dd).*8: $H1_IPV6 > $H2_IPV6"
+
+	check_rcv $h2 "$test_name: Multicast IPv6 untagged" \
+		"$smac > $MACV6_ALLNODES, ethertype IPv6 (0x86dd).*: $h1_ipv6_lladdr > $IPV6_ALLNODES"
+
+	for vid in "${vids[@]}"; do
+		check_rcv $h2 "$test_name: Unicast non-IP VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Multicast non-IP VID $vid" \
+			"$smac > $NON_IP_MC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Broadcast non-IP VID $vid" \
+			"$smac > $BC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Unicast IPv4 VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $H2_IPV4"
+
+		check_rcv $h2 "$test_name: Multicast IPv4 VID $vid" \
+			"$smac > $MACV4_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $IPV4_ALLNODES"
+
+		check_rcv $h2 "$test_name: Unicast IPv6 VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $H1_IPV6 > $H2_IPV6"
+
+		check_rcv $h2 "$test_name: Multicast IPv6 VID $vid" \
+			"$smac > $MACV6_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $h1_ipv6_lladdr > $IPV6_ALLNODES"
+	done
+
+	tcpdump_cleanup $h2
+}
+
+standalone()
+{
+	run_test "Standalone switch ports"
+}
+
+two_bridges()
+{
+	ip link add br0 type bridge && ip link set br0 up
+	ip link add br1 type bridge && ip link set br1 up
+	ip link set $swp1 master br0
+	ip link set $swp2 master br1
+
+	run_test "Switch ports in different bridges"
+
+	ip link del br1
+	ip link del br0
+}
+
+one_bridge_two_pvids()
+{
+	ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0
+	ip link set br0 up
+	ip link set $swp1 master br0
+	ip link set $swp2 master br0
+
+	bridge vlan add dev $swp1 vid 1 pvid untagged
+	bridge vlan add dev $swp1 vid 2 pvid untagged
+
+	run_test "Switch ports in VLAN-aware bridge with different PVIDs"
+
+	ip link del br0
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	# we call simple_if_init from the test itself, but setup_wait expects
+	# that we call it from here, and waits until the interfaces are up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index 55eeacf59241..af008fbf2725 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -60,7 +60,9 @@ h2_destroy()
 
 switch_create()
 {
-	ip link add name br1 up type bridge vlan_filtering 1
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
 	ip link set dev $swp1 master br1
 	ip link set dev $swp1 up
 	ip link set dev $swp2 master br1
@@ -75,7 +77,9 @@ switch_destroy()
 	tc qdisc del dev $swp2 clsact
 	tc qdisc del dev $swp1 clsact
 
+	ip link set dev $swp2 down
 	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
 	ip link set dev $swp1 nomaster
 	ip link del dev br1
 }
diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh
new file mode 100755
index 000000000000..d14efb2d23b2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
+# filter installed on $h2 verifies that the packet looks like expected.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	test_ip4_src
+	test_ip4_dst
+	test_ip6_src
+	test_ip6_dst
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add name br1 up type bridge vlan_filtering 1
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_ip()
+{
+	local pedit_locus=$1; shift
+	local pedit_action=$1; shift
+	local match_prot=$1; shift
+	local match_flower=$1; shift
+	local mz_flags=$1; shift
+
+	tc filter add $pedit_locus handle 101 pref 1 \
+	   flower action pedit ex munge $pedit_action
+	tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+	   flower skip_hw $match_flower action pass
+
+	RET=0
+
+	$MZ $mz_flags $h1 -c 10 -d 20msec -p 100 -a own -b $h2mac -q -t ip
+
+	local pkts
+	pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+			tc_rule_handle_stats_get "dev $h2 ingress" 101)
+	check_err $? "Expected to get 10 packets, but got $pkts."
+
+	pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+	((pkts >= 10))
+	check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+	log_test "$pedit_locus pedit $pedit_action"
+
+	tc filter del dev $h2 ingress pref 1
+	tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_ip6()
+{
+	local locus=$1; shift
+	local pedit_addr=$1; shift
+	local flower_addr=$1; shift
+
+	do_test_pedit_ip "$locus" "$pedit_addr set 2001:db8:2::1" ipv6	\
+			 "$flower_addr 2001:db8:2::1"			\
+			 "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+}
+
+do_test_pedit_ip4()
+{
+	local locus=$1; shift
+	local pedit_addr=$1; shift
+	local flower_addr=$1; shift
+
+	do_test_pedit_ip "$locus" "$pedit_addr set 198.51.100.1" ip	\
+			 "$flower_addr 198.51.100.1"			\
+			 "-A 192.0.2.1 -B 192.0.2.2"
+}
+
+test_ip4_src()
+{
+	do_test_pedit_ip4 "dev $swp1 ingress" "ip src" src_ip
+	do_test_pedit_ip4 "dev $swp2 egress"  "ip src" src_ip
+}
+
+test_ip4_dst()
+{
+	do_test_pedit_ip4 "dev $swp1 ingress" "ip dst" dst_ip
+	do_test_pedit_ip4 "dev $swp2 egress"  "ip dst" dst_ip
+}
+
+test_ip6_src()
+{
+	do_test_pedit_ip6 "dev $swp1 ingress" "ip6 src" src_ip
+	do_test_pedit_ip6 "dev $swp2 egress"  "ip6 src" src_ip
+}
+
+test_ip6_dst()
+{
+	do_test_pedit_ip6 "dev $swp1 ingress" "ip6 dst" dst_ip
+	do_test_pedit_ip6 "dev $swp2 egress"  "ip6 dst" dst_ip
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
index 5f20d289ee43..10e594c55117 100755
--- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -71,7 +71,9 @@ switch_destroy()
 	tc qdisc del dev $swp2 clsact
 	tc qdisc del dev $swp1 clsact
 
+	ip link set dev $swp2 down
 	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
 	ip link set dev $swp1 nomaster
 	ip link del dev br1
 }
diff --git a/tools/testing/selftests/net/forwarding/q_in_vni.sh b/tools/testing/selftests/net/forwarding/q_in_vni.sh
new file mode 100755
index 000000000000..798b13525c02
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/q_in_vni.sh
@@ -0,0 +1,348 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# |  + $h1.10             |                          |  + $h2.10              |
+# |  | 192.0.2.1/28       |                          |  | 192.0.2.2/28        |
+# |  |                    |                          |  |                     |
+# |  | + $h1.20           |                          |  | + $h2.20            |
+# |  \ | 198.51.100.1/24  |                          |  \ | 198.51.100.2/24   |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1ad)            + $swp2           | |
+# | |    vid 100 pvid untagged                              vid 100 pvid    | |
+# | |                                                           untagged    | |
+# | |  + vx100 (vxlan)                                                      | |
+# | |    local 192.0.2.17                                                   | |
+# | |    remote 192.0.2.34 192.0.2.50                                       | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | |    vid 100 pvid untagged                                              | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  192.0.2.32/28 via 192.0.2.18                                             |
+# |  192.0.2.48/28 via 192.0.2.18                                             |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.17/28                                                        |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRP2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)      |
+# |    | 192.0.2.33/28                         | 192.0.2.49/28  |
+# +----|---------------------------------------|----------------+
+#      |                                       |
+# +----|------------------------------+   +----|------------------------------+
+# |    + v2 (veth)        NS1 (netns) |   |    + v4 (veth)        NS2 (netns) |
+# |      192.0.2.34/28                |   |      192.0.2.50/28                |
+# |                                   |   |                                   |
+# |   192.0.2.16/28 via 192.0.2.33    |   |   192.0.2.16/28 via 192.0.2.49    |
+# |   192.0.2.50/32 via 192.0.2.33    |   |   192.0.2.34/32 via 192.0.2.49    |
+# |                                   |   |                                   |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# | |                 BR2 (802.1ad) | |   | |                 BR2 (802.1ad) | |
+# | |  + vx100 (vxlan)              | |   | |  + vx100 (vxlan)              | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 1000 dstport $VXPORT    | |   | |    id 1000 dstport $VXPORT    | |
+# | |    vid 100 pvid untagged      | |   | |    vid 100 pvid untagged      | |
+# | |                               | |   | |                               | |
+# | |  + w1 (veth)                  | |   | |  + w1 (veth)                  | |
+# | |  | vid 100 pvid untagged      | |   | |  | vid 100 pvid untagged      | |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# |    |                              |   |    |                              |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# | |  |                  VW2 (vrf) | |   | |  |                  VW2 (vrf) | |
+# | |  + w2 (veth)                  | |   | |  + w2 (veth)                  | |
+# | |  |\                           | |   | |  |\                           | |
+# | |  | + w2.10                    | |   | |  | + w2.10                    | |
+# | |  |   192.0.2.3/28             | |   | |  |   192.0.2.4/28             | |
+# | |  |                            | |   | |  |                            | |
+# | |  + w2.20                      | |   | |  + w2.20                      | |
+# | |    198.51.100.3/24            | |   | |    198.51.100.4/24            | |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# +-----------------------------------+   +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+	vlan_create $h1 20 v$h1 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 10 v$h2 192.0.2.2/28
+	vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 192.0.2.17/28
+
+	ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+	ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+	ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+	ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br1 addrgenmode none
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	ip link add name vx100 type vxlan id 1000		\
+		local 192.0.2.17 dstport "$VXPORT"	\
+		nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev vx100 master br1
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 100 dev $swp1 pvid untagged
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	bridge vlan add vid 100 dev $swp2 pvid untagged
+
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	bridge vlan del vid 100 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 100 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx100 nomaster
+	ip link set dev vx100 down
+	ip link del dev vx100
+
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	__simple_if_init v1 v$rp2 192.0.2.33/28
+	__simple_if_init v3 v$rp2 192.0.2.49/28
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 192.0.2.49/28
+	__simple_if_fini v1 192.0.2.33/28
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local nh_addr=$1; shift
+	local host_addr1=$1; shift
+	local host_addr2=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/28
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0
+	ip link set dev br2 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br2
+	ip link set dev w1 up
+	bridge vlan add vid 100 dev w1 pvid untagged
+
+	ip link add name vx100 type vxlan id 1000 local $in_addr \
+		dstport "$VXPORT"
+	ip link set dev vx100 up
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx100 master br2
+	tc qdisc add dev vx100 clsact
+
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	simple_if_init w2
+        vlan_create w2 10 vw2 $host_addr1/28
+        vlan_create w2 20 vw2 $host_addr2/24
+
+	ip route add 192.0.2.16/28 nexthop via $nh_addr
+	ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 \
+			     192.0.2.3 198.51.100.3
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 \
+			     192.0.2.4 198.51.100.4
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2 ": local->local"
+	ping_test $h1 192.0.2.3 ": local->remote 1"
+	ping_test $h1 192.0.2.4 ": local->remote 2"
+}
+
+test_all()
+{
+	echo "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh
new file mode 100755
index 000000000000..0548b2b0d416
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# |  + $h1.10             |                          |  + $h2.10              |
+# |  | 2001:db8:1::1/64   |                          |  | 2001:db8:1::2/64    |
+# |  |                    |                          |  |                     |
+# |  | + $h1.20           |                          |  | + $h2.20            |
+# |  \ | 2001:db8:2::1/64 |                          |  \ | 2001:db8:2::2/64  |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1ad)            + $swp2           | |
+# | |    vid 100 pvid untagged                              vid 100 pvid    | |
+# | |                                                           untagged    | |
+# | |  + vx100 (vxlan)                                                      | |
+# | |    local 2001:db8:3::1                                                | |
+# | |    remote 2001:db8:4::1 2001:db8:5::1                                 | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | |    vid 100 pvid untagged                                              | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  2001:db8:4::0/64 via 2001:db8:3::2                                       |
+# |  2001:db8:5::0/64 via 2001:db8:3::2                                       |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 2001:db8:3::1/64                                                     |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------------------------------------+
+# |    |                                             VRP2 (vrf)   |
+# |    + $rp2                                                     |
+# |      2001:db8:3::2/64                                         |
+# |                                                               |   (maybe) HW
+# =============================================================================
+# |                                                               | (likely) SW
+# |    + v1 (veth)                             + v3 (veth)        |
+# |    | 2001:db8:4::2/64                      | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+#      |                                       |
+# +----|--------------------------------+ +----|-------------------------------+
+# |    + v2 (veth)        NS1 (netns)   | |    + v4 (veth)        NS2 (netns)  |
+# |      2001:db8:4::1/64               | |      2001:db8:5::1/64              |
+# |                                     | |                                    |
+# | 2001:db8:3::0/64 via 2001:db8:4::2  | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via              |
+# |                                     | |           2001:db8:5::2            |
+# | +-------------------------------+   | | +-------------------------------+  |
+# | |                 BR2 (802.1ad) |   | | |                 BR2 (802.1ad) |  |
+# | |  + vx100 (vxlan)              |   | | |  + vx100 (vxlan)              |  |
+# | |    local 2001:db8:4::1        |   | | |    local 2001:db8:5::1        |  |
+# | |    remote 2001:db8:3::1       |   | | |    remote 2001:db8:3::1       |  |
+# | |    remote 2001:db8:5::1       |   | | |    remote 2001:db8:4::1       |  |
+# | |    id 1000 dstport $VXPORT    |   | | |    id 1000 dstport $VXPORT    |  |
+# | |    vid 100 pvid untagged      |   | | |    vid 100 pvid untagged      |  |
+# | |                               |   | | |                               |  |
+# | |  + w1 (veth)                  |   | | |  + w1 (veth)                  |  |
+# | |  | vid 100 pvid untagged      |   | | |  | vid 100 pvid untagged      |  |
+# | +--|----------------------------+   | | +--|----------------------------+  |
+# |    |                                | |    |                               |
+# | +--|----------------------------+   | | +--|----------------------------+  |
+# | |  |                  VW2 (vrf) |   | | |  |                  VW2 (vrf) |  |
+# | |  + w2 (veth)                  |   | | |  + w2 (veth)                  |  |
+# | |  |\                           |   | | |  |\                           |  |
+# | |  | + w2.10                    |   | | |  | + w2.10                    |  |
+# | |  |   2001:db8:1::3/64         |   | | |  |   2001:db8:1::4/64         |  |
+# | |  |                            |   | | |  |                            |  |
+# | |  + w2.20                      |   | | |  + w2.20                      |  |
+# | |    2001:db8:2::3/64           |   | | |    2001:db8:2::4/64           |  |
+# | +-------------------------------+   | | +-------------------------------+  |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv6
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 2001:db8:1::1/64
+	vlan_create $h1 20 v$h1 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 10 v$h2 2001:db8:1::2/64
+	vlan_create $h2 20 v$h2 2001:db8:2::2/64
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 2001:db8:3::1/64
+
+	ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+	ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+	ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+	ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+	ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	ip link add name vx100 type vxlan id 1000	\
+		local 2001:db8:3::1 dstport "$VXPORT"	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev vx100 master br1
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 100 dev $swp1 pvid untagged
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	bridge vlan add vid 100 dev $swp2 pvid untagged
+
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+	bridge vlan del vid 100 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 100 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx100 nomaster
+	ip link set dev vx100 down
+	ip link del dev vx100
+
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 2001:db8:3::2/64
+	__simple_if_init v1 v$rp2 2001:db8:4::2/64
+	__simple_if_init v3 v$rp2 2001:db8:5::2/64
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 2001:db8:5::2/64
+	__simple_if_fini v1 2001:db8:4::2/64
+	simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local nh_addr=$1; shift
+	local host_addr1=$1; shift
+	local host_addr2=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/64
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0
+	ip link set dev br2 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br2
+	ip link set dev w1 up
+	bridge vlan add vid 100 dev w1 pvid untagged
+
+	ip link add name vx100 type vxlan id 1000 local $in_addr \
+		dstport "$VXPORT" udp6zerocsumrx
+	ip link set dev vx100 up
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:3::1 self
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx100 master br2
+	tc qdisc add dev vx100 clsact
+
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	simple_if_init w2
+        vlan_create w2 10 vw2 $host_addr1/64
+        vlan_create w2 20 vw2 $host_addr2/64
+
+	ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+	ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+			    2001:db8:1::3 2001:db8:2::3
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+			     2001:db8:1::4 2001:db8:2::4
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2 ": local->local"
+	ping6_test $h1 2001:db8:1::3 ": local->remote 1"
+	ping6_test $h1 2001:db8:1::4 ": local->remote 2"
+}
+
+test_all()
+{
+	echo "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index 057f91b05098..b98ea9449b8b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,24 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |              $h1 + |                     | + $h2                |
+# |     192.0.2.2/24 | |                     | | 198.51.100.2/24    |
+# | 2001:db8:1::2/64 | |                     | | 2001:db8:2::2/64   |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |     192.0.2.1/24                             198.51.100.1/24    |
+# | 2001:db8:1::1/64                             2001:db8:2::1/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
 ALL_TESTS="
 	ping_ipv4
 	ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
index ebc596a272f7..0182eb2abfa6 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge.sh
@@ -1,9 +1,39 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# +------------------------+                           +----------------------+
+# | H1 (vrf)               |                           |             H2 (vrf) |
+# |    + $h1               |                           |  + $h2               |
+# |    | 192.0.2.1/28      |                           |  | 192.0.2.130/28    |
+# |    | 2001:db8:1::1/64  |                           |  | 2001:db8:2::2/64  |
+# |    |                   |                           |  |                   |
+# +----|-------------------+                           +--|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|-----------------------------+                    + $swp2             |
+# | |  + $swp1      BR1 (802.1q)     |                      192.0.2.129/28    |
+# | |               192.0.2.2/28     |                      2001:db8:2::1/64  |
+# | |               2001:db8:1::1/64 |                                        |
+# | |                                |                                        |
+# | +--------------------------------+                                        |
+# +---------------------------------------------------------------------------+
+
 ALL_TESTS="
 	ping_ipv4
 	ping_ipv6
+	config_remaster
+	ping_ipv4
+	ping_ipv6
+	config_remove_pvid
+	ping_ipv4_fails
+	ping_ipv6_fails
+	config_add_pvid
+	ping_ipv4
+	ping_ipv6
+	config_late_pvid
+	ping_ipv4
+	ping_ipv6
 "
 NUM_NETIFS=4
 source lib.sh
@@ -38,7 +68,8 @@ h2_destroy()
 
 router_create()
 {
-	ip link add name br1 type bridge vlan_filtering 1
+	ip link add name br1 address $(mac_get $swp1) \
+		type bridge vlan_filtering 1
 	ip link set dev br1 up
 
 	ip link set dev $swp1 master br1
@@ -61,6 +92,42 @@ router_destroy()
 	ip link del dev br1
 }
 
+config_remaster()
+{
+	log_info "Remaster bridge slave"
+
+	ip link set dev $swp1 nomaster
+	sleep 2
+	ip link set dev $swp1 master br1
+}
+
+config_remove_pvid()
+{
+	log_info "Remove PVID from the bridge"
+
+	bridge vlan add dev br1 vid 1 self
+	sleep 2
+}
+
+config_add_pvid()
+{
+	log_info "Add PVID to the bridge"
+
+	bridge vlan add dev br1 vid 1 self pvid untagged
+	sleep 2
+}
+
+config_late_pvid()
+{
+	log_info "Add bridge PVID after enslaving port"
+
+	ip link set dev $swp1 nomaster
+	ip link set dev br1 type bridge vlan_default_pvid 0
+	sleep 2
+	ip link set dev $swp1 master br1
+	ip link set dev br1 type bridge vlan_default_pvid 1
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -103,6 +170,16 @@ ping_ipv6()
 	ping6_test $h1 2001:db8:2::2
 }
 
+ping_ipv4_fails()
+{
+	ping_test_fails $h1 192.0.2.130
+}
+
+ping_ipv6_fails()
+{
+	ping6_test_fails $h1 2001:db8:2::2
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d.sh
new file mode 100755
index 000000000000..6d51f2ca72a2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_1d.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +---------------------------------------------+      +----------------------+
+# | H1 (vrf)                                    |      |             H2 (vrf) |
+# |    + $h1.100            + $h1.200           |      |  + $h2               |
+# |    | 192.0.2.1/28       | 192.0.2.17/28     |      |  | 192.0.2.130/28    |
+# |    | 2001:db8:1::1/64   | 2001:db8:3::1/64  |      |  | 192.0.2.146/28    |
+# |    \_________ __________/                   |      |  | 2001:db8:2::2/64  |
+# |              V                              |      |  | 2001:db8:4::2/64  |
+# |              + $h1                          |      |  |                   |
+# +--------------|------------------------------+      +--|-------------------+
+#                |                                        |
+# +--------------|----------------------------------------|-------------------+
+# | SW           + $swp1                                  + $swp2             |
+# |              |                                          192.0.2.129/28    |
+# |              |                                          192.0.2.145/28    |
+# |              |                                          2001:db8:2::1/64  |
+# |      ________^___________________________               2001:db8:4::1/64  |
+# |     /                                    \                                |
+# | +---|------------------------------+ +---|------------------------------+ |
+# | |   + $swp1.100   BR1 (802.1d)     | |   + $swp1.200   BR2 (802.1d)     | |
+# | |                 192.0.2.2/28     | |                 192.0.2.18/28    | |
+# | |                 2001:db8:1::2/64 | |                 2001:db8:3::2/64 | |
+# | |                                  | |                                  | |
+# | +----------------------------------+ +----------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	config_remaster
+	ping_ipv4
+	ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 100 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	vlan_create $h1 200 v$h1 192.0.2.17/28 2001:db8:3::1/64
+	ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+	ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:4::/64 vrf v$h1
+	ip -6 route del 2001:db8:2::/64 vrf v$h1
+	ip -4 route del 192.0.2.144/28 vrf v$h1
+	ip -4 route del 192.0.2.128/28 vrf v$h1
+	vlan_destroy $h1 200
+	vlan_destroy $h1 100
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \
+			   192.0.2.146/28 2001:db8:4::2/64
+	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+	ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+	ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:3::/64 vrf v$h2
+	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.16/28 vrf v$h2
+	ip -4 route del 192.0.2.0/28 vrf v$h2
+	simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 \
+			   192.0.2.146/28 2001:db8:4::2/64
+}
+
+router_create()
+{
+	ip link set dev $swp1 up
+
+	vlan_create $swp1 100
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev br1 address $(mac_get $swp1.100)
+	ip link set dev $swp1.100 master br1
+	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+	ip link set dev br1 up
+
+	vlan_create $swp1 200
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 address $(mac_get $swp1.200)
+	ip link set dev $swp1.200 master br2
+	__addr_add_del br2 add 192.0.2.18/28 2001:db8:3::2/64
+	ip link set dev br2 up
+
+	ip link set dev $swp2 up
+	__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 \
+				 192.0.2.145/28 2001:db8:4::1/64
+}
+
+router_destroy()
+{
+	__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 \
+				 192.0.2.145/28 2001:db8:4::1/64
+	ip link set dev $swp2 down
+
+	__addr_add_del br2 del 192.0.2.18/28 2001:db8:3::2/64
+	ip link set dev $swp1.200 nomaster
+	ip link del dev br2
+	vlan_destroy $swp1 200
+
+	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+	ip link set dev $swp1.100 nomaster
+	ip link del dev br1
+	vlan_destroy $swp1 100
+
+	ip link set dev $swp1 down
+}
+
+config_remaster()
+{
+	log_info "Remaster bridge slaves"
+
+	ip link set dev $swp1.100 nomaster
+	ip link set dev $swp1.200 nomaster
+	sleep 2
+	ip link set dev $swp1.200 master br2
+	ip link set dev $swp1.100 master br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.130 ": via 100"
+	ping_test $h1 192.0.2.146 ": via 200"
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2 ": via 100"
+	ping6_test $h1 2001:db8:4::2 ": via 200"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
new file mode 100755
index 000000000000..e064b946e821
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
@@ -0,0 +1,408 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------------------------------+
+# | H1 (vrf)                                   |
+# |                                            |
+# |    + LAG1.100          + LAG1.200          |
+# |    | 192.0.2.1/28      | 192.0.2.17/28     |
+# |    | 2001:db8:1::1/64  | 2001:db8:3:1/64   |
+# |    \___________ _______/                   |
+# |                v                           |
+# |                + LAG1 (team)               |
+# |                |                           |
+# |            ____^____                       |
+# |           /         \                      |
+# |          + $h1       + $h4                 |
+# |          |           |                     |
+# +----------|-----------|---------------------+
+#            |           |
+# +----------|-----------|---------------------+
+# | SW       |           |                     |
+# |          + $swp1     + $swp4               |
+# |           \____ ____/                      |
+# |                v                           |
+# |    LAG2 (team) +                           |
+# |                |                           |
+# |         _______^______________             |
+# |        /                      \            |
+# | +------|------------+ +-------|----------+ |
+# | |      + LAG2.100   | |       + LAG2.200 | |
+# | |                   | |                  | |
+# | |  BR1 (802.1d)     | | BR2 (802.1d)     | |
+# | |  192.0.2.2/28     | | 192.0.2.18/28    | |
+# | |  2001:db8:1::2/64 | | 2001:db8:3:2/64  | |
+# | |                   | |                  | |
+# | +-------------------+ +------------------+ |
+# |                                            |
+# |  + LAG3.100             + LAG3.200         |
+# |  | 192.0.2.129/28       | 192.0.2.145/28   |
+# |  | 2001:db8:2::1/64     | 2001:db8:4::1/64 |
+# |  |                      |                  |
+# |  \_________ ___________/                   |
+# |            v                               |
+# |            + LAG3 (team)                   |
+# |        ____|____                           |
+# |       /         \                          |
+# |       + $swp2   + $swp3                    |
+# |       |         |                          |
+# +-------|---------|--------------------------+
+#         |         |
+# +-------|---------|--------------------------+
+# |       |         |                          |
+# |       + $h2     + $h3                      |
+# |       \____ ___/                           |
+# |            |                               |
+# |            + LAG4 (team)                   |
+# |            |                               |
+# |  __________^__________                     |
+# | /                     \                    |
+# | |                     |                    |
+# | + LAG4.100            + LAG4.200           |
+# |   192.0.2.130/28        192.0.2.146/28     |
+# |   2001:db8:2::2/64      2001:db8:4::2/64   |
+# |                                            |
+# | H2 (vrf)                                   |
+# +--------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+
+	$(: exercise remastering of LAG2 slaves )
+	config_deslave_swp4
+	config_wait
+	ping_ipv4
+	ping_ipv6
+	config_enslave_swp4
+	config_deslave_swp1
+	config_wait
+	ping_ipv4
+	ping_ipv6
+	config_deslave_swp4
+	config_enslave_swp1
+	config_enslave_swp4
+	config_wait
+	ping_ipv4
+	ping_ipv6
+
+	$(: exercise remastering of LAG2 itself )
+	config_remaster_lag2
+	config_wait
+	ping_ipv4
+	ping_ipv6
+
+	$(: exercise remastering of LAG3 slaves )
+	config_deslave_swp2
+	config_wait
+	ping_ipv4
+	ping_ipv6
+	config_enslave_swp2
+	config_deslave_swp3
+	config_wait
+	ping_ipv4
+	ping_ipv6
+	config_deslave_swp2
+	config_enslave_swp3
+	config_enslave_swp2
+	config_wait
+	ping_ipv4
+	ping_ipv6
+"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	team_create lag1 lacp
+	ip link set dev lag1 addrgenmode none
+	ip link set dev lag1 address $(mac_get $h1)
+	ip link set dev $h1 master lag1
+	ip link set dev $h4 master lag1
+	simple_if_init lag1
+	ip link set dev $h1 up
+	ip link set dev $h4 up
+
+	vlan_create lag1 100 vlag1 192.0.2.1/28 2001:db8:1::1/64
+	vlan_create lag1 200 vlag1 192.0.2.17/28 2001:db8:3::1/64
+
+	ip -4 route add 192.0.2.128/28 vrf vlag1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf vlag1 nexthop via 2001:db8:1::2
+
+	ip -4 route add 192.0.2.144/28 vrf vlag1 nexthop via 192.0.2.18
+	ip -6 route add 2001:db8:4::/64 vrf vlag1 nexthop via 2001:db8:3::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:4::/64 vrf vlag1
+	ip -4 route del 192.0.2.144/28 vrf vlag1
+
+	ip -6 route del 2001:db8:2::/64 vrf vlag1
+	ip -4 route del 192.0.2.128/28 vrf vlag1
+
+	vlan_destroy lag1 200
+	vlan_destroy lag1 100
+
+	ip link set dev $h4 down
+	ip link set dev $h1 down
+	simple_if_fini lag1
+	ip link set dev $h4 nomaster
+	ip link set dev $h1 nomaster
+	team_destroy lag1
+}
+
+h2_create()
+{
+	team_create lag4 lacp
+	ip link set dev lag4 addrgenmode none
+	ip link set dev lag4 address $(mac_get $h2)
+	ip link set dev $h2 master lag4
+	ip link set dev $h3 master lag4
+	simple_if_init lag4
+	ip link set dev $h2 up
+	ip link set dev $h3 up
+
+	vlan_create lag4 100 vlag4 192.0.2.130/28 2001:db8:2::2/64
+	vlan_create lag4 200 vlag4 192.0.2.146/28 2001:db8:4::2/64
+
+	ip -4 route add 192.0.2.0/28 vrf vlag4 nexthop via 192.0.2.129
+	ip -6 route add 2001:db8:1::/64 vrf vlag4 nexthop via 2001:db8:2::1
+
+	ip -4 route add 192.0.2.16/28 vrf vlag4 nexthop via 192.0.2.145
+	ip -6 route add 2001:db8:3::/64 vrf vlag4 nexthop via 2001:db8:4::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:3::/64 vrf vlag4
+	ip -4 route del 192.0.2.16/28 vrf vlag4
+
+	ip -6 route del 2001:db8:1::/64 vrf vlag4
+	ip -4 route del 192.0.2.0/28 vrf vlag4
+
+	vlan_destroy lag4 200
+	vlan_destroy lag4 100
+
+	ip link set dev $h3 down
+	ip link set dev $h2 down
+	simple_if_fini lag4
+	ip link set dev $h3 nomaster
+	ip link set dev $h2 nomaster
+	team_destroy lag4
+}
+
+router_create()
+{
+	team_create lag2 lacp
+	ip link set dev lag2 addrgenmode none
+	ip link set dev lag2 address $(mac_get $swp1)
+	ip link set dev $swp1 master lag2
+	ip link set dev $swp4 master lag2
+
+	vlan_create lag2 100
+	vlan_create lag2 200
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev br1 address $(mac_get lag2.100)
+	ip link set dev lag2.100 master br1
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 address $(mac_get lag2.200)
+	ip link set dev lag2.200 master br2
+
+	ip link set dev $swp1 up
+	ip link set dev $swp4 up
+	ip link set dev br1 up
+	ip link set dev br2 up
+
+	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+	__addr_add_del br2 add 192.0.2.18/28 2001:db8:3::2/64
+
+	team_create lag3 lacp
+	ip link set dev lag3 addrgenmode none
+	ip link set dev lag3 address $(mac_get $swp2)
+	ip link set dev $swp2 master lag3
+	ip link set dev $swp3 master lag3
+	ip link set dev $swp2 up
+	ip link set dev $swp3 up
+
+	vlan_create lag3 100
+	vlan_create lag3 200
+
+	__addr_add_del lag3.100 add 192.0.2.129/28 2001:db8:2::1/64
+	__addr_add_del lag3.200 add 192.0.2.145/28 2001:db8:4::1/64
+}
+
+router_destroy()
+{
+	__addr_add_del lag3.200 del 192.0.2.145/28 2001:db8:4::1/64
+	__addr_add_del lag3.100 del 192.0.2.129/28 2001:db8:2::1/64
+
+	vlan_destroy lag3 200
+	vlan_destroy lag3 100
+
+	ip link set dev $swp3 down
+	ip link set dev $swp2 down
+	ip link set dev $swp3 nomaster
+	ip link set dev $swp2 nomaster
+	team_destroy lag3
+
+	__addr_add_del br2 del 192.0.2.18/28 2001:db8:3::2/64
+	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+
+	ip link set dev br2 down
+	ip link set dev br1 down
+	ip link set dev $swp4 down
+	ip link set dev $swp1 down
+
+	ip link set dev lag2.200 nomaster
+	ip link del dev br2
+
+	ip link set dev lag2.100 nomaster
+	ip link del dev br1
+
+	vlan_destroy lag2 200
+	vlan_destroy lag2 100
+
+	ip link set dev $swp4 nomaster
+	ip link set dev $swp1 nomaster
+	team_destroy lag2
+}
+
+config_remaster_lag2()
+{
+	log_info "Remaster bridge slaves"
+
+	ip link set dev lag2.200 nomaster
+	ip link set dev lag2.100 nomaster
+	sleep 2
+	ip link set dev lag2.100 master br1
+	ip link set dev lag2.200 master br2
+}
+
+config_deslave()
+{
+	local netdev=$1; shift
+
+	log_info "Deslave $netdev"
+	ip link set dev $netdev down
+	ip link set dev $netdev nomaster
+	ip link set dev $netdev up
+}
+
+config_deslave_swp1()
+{
+	config_deslave $swp1
+}
+
+config_deslave_swp2()
+{
+	config_deslave $swp2
+}
+
+config_deslave_swp3()
+{
+	config_deslave $swp3
+}
+
+config_deslave_swp4()
+{
+	config_deslave $swp4
+}
+
+config_enslave()
+{
+	local netdev=$1; shift
+	local master=$1; shift
+
+	log_info "Enslave $netdev to $master"
+	ip link set dev $netdev down
+	ip link set dev $netdev master $master
+	ip link set dev $netdev up
+}
+
+config_enslave_swp1()
+{
+	config_enslave $swp1 lag2
+}
+
+config_enslave_swp2()
+{
+	config_enslave $swp2 lag3
+}
+
+config_enslave_swp3()
+{
+	config_enslave $swp3 lag3
+}
+
+config_enslave_swp4()
+{
+	config_enslave $swp4 lag2
+}
+
+config_wait()
+{
+	setup_wait_dev lag2
+	setup_wait_dev lag3
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h4=${NETIFS[p7]}
+	swp4=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test lag1.100 192.0.2.130 ": via 100"
+	ping_test lag1.200 192.0.2.146 ": via 200"
+}
+
+ping_ipv6()
+{
+	ping6_test lag1.100 2001:db8:2::2 ": via 100"
+	ping6_test lag1.200 2001:db8:4::2 ": via 200"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
new file mode 100755
index 000000000000..f05ffe213c46
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
@@ -0,0 +1,323 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +----------------------------+                   +--------------------------+
+# | H1 (vrf)                   |                   |                 H2 (vrf) |
+# |                            |                   |                          |
+# |        + LAG1 (team)       |                   |     + LAG4 (team)        |
+# |        | 192.0.2.1/28      |                   |     | 192.0.2.130/28     |
+# |        | 2001:db8:1::1/64  |                   |     | 2001:db8:2::2/64   |
+# |      __^___                |                   |   __^_____               |
+# |     /      \               |                   |  /        \              |
+# |    + $h1    + $h4          |                   | + $h2      + $h3         |
+# |    |        |              |                   | |          |             |
+# +----|--------|--------------+                   +-|----------|-------------+
+#      |        |                                    |          |
+# +----|--------|------------------------------------|----------|-------------+
+# | SW |        |                                    |          |             |
+# |    + $swp1  + $swp4                              + $swp2    + $swp3       |
+# |     \__ ___/                                      \__ _____/              |
+# |        v                                             v                    |
+# | +------|-------------------------------+             |                    |
+# | |      + LAG2       BR1 (802.1q)       |             + LAG3 (team)        |
+# | |        (team)       192.0.2.2/28     |               192.0.2.129/28     |
+# | |                     2001:db8:1::2/64 |               2001:db8:2::1/64   |
+# | |                                      |                                  |
+# | +--------------------------------------+                                  |
+# +---------------------------------------------------------------------------+
+
+: ${ALL_TESTS:="
+	ping_ipv4
+	ping_ipv6
+
+	$(: exercise remastering of LAG2 slaves )
+	config_deslave_swp4
+	config_wait
+	ping_ipv4
+	ping_ipv6
+	config_enslave_swp4
+	config_deslave_swp1
+	config_wait
+	ping_ipv4
+	ping_ipv6
+	config_deslave_swp4
+	config_enslave_swp1
+	config_enslave_swp4
+	config_wait
+	ping_ipv4
+	ping_ipv6
+
+	$(: exercise remastering of LAG2 itself )
+	config_remaster_lag2
+	config_wait
+	ping_ipv4
+	ping_ipv6
+
+	$(: exercise remastering of LAG3 slaves )
+	config_deslave_swp2
+	config_wait
+	ping_ipv4
+	ping_ipv6
+	config_enslave_swp2
+	config_deslave_swp3
+	config_wait
+	ping_ipv4
+	ping_ipv6
+	config_deslave_swp2
+	config_enslave_swp3
+	config_enslave_swp2
+	config_wait
+	ping_ipv4
+	ping_ipv6
+
+	$(: move LAG3 to a bridge and then out )
+	config_remaster_lag3
+	config_wait
+	ping_ipv4
+	ping_ipv6
+    "}
+NUM_NETIFS=8
+: ${lib_dir:=.}
+source $lib_dir/lib.sh
+$EXTRA_SOURCE
+
+h1_create()
+{
+	team_create lag1 lacp
+	ip link set dev lag1 address $(mac_get $h1)
+	ip link set dev $h1 master lag1
+	ip link set dev $h4 master lag1
+	simple_if_init lag1 192.0.2.1/28 2001:db8:1::1/64
+	ip link set dev $h1 up
+	ip link set dev $h4 up
+	ip -4 route add 192.0.2.128/28 vrf vlag1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf vlag1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf vlag1
+	ip -4 route del 192.0.2.128/28 vrf vlag1
+	ip link set dev $h4 down
+	ip link set dev $h1 down
+	simple_if_fini lag1 192.0.2.1/28 2001:db8:1::1/64
+	ip link set dev $h4 nomaster
+	ip link set dev $h1 nomaster
+	team_destroy lag1
+}
+
+h2_create()
+{
+	team_create lag4 lacp
+	ip link set dev lag4 address $(mac_get $h2)
+	ip link set dev $h2 master lag4
+	ip link set dev $h3 master lag4
+	simple_if_init lag4 192.0.2.130/28 2001:db8:2::2/64
+	ip link set dev $h2 up
+	ip link set dev $h3 up
+	ip -4 route add 192.0.2.0/28 vrf vlag4 nexthop via 192.0.2.129
+	ip -6 route add 2001:db8:1::/64 vrf vlag4 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf vlag4
+	ip -4 route del 192.0.2.0/28 vrf vlag4
+	ip link set dev $h3 down
+	ip link set dev $h2 down
+	simple_if_fini lag4 192.0.2.130/28 2001:db8:2::2/64
+	ip link set dev $h3 nomaster
+	ip link set dev $h2 nomaster
+	team_destroy lag4
+}
+
+router_create()
+{
+	team_create lag2 lacp
+	ip link set dev lag2 address $(mac_get $swp1)
+	ip link set dev $swp1 master lag2
+	ip link set dev $swp4 master lag2
+
+	ip link add name br1 address $(mac_get lag2) \
+		type bridge vlan_filtering 1
+	ip link set dev lag2 master br1
+
+	ip link set dev $swp1 up
+	ip link set dev $swp4 up
+	ip link set dev br1 up
+
+	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+	team_create lag3 lacp
+	ip link set dev lag3 address $(mac_get $swp2)
+	ip link set dev $swp2 master lag3
+	ip link set dev $swp3 master lag3
+	ip link set dev $swp2 up
+	ip link set dev $swp3 up
+	__addr_add_del lag3 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+	__addr_add_del lag3 del 192.0.2.129/28 2001:db8:2::1/64
+	ip link set dev $swp3 down
+	ip link set dev $swp2 down
+	ip link set dev $swp3 nomaster
+	ip link set dev $swp2 nomaster
+	team_destroy lag3
+
+	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+
+	ip link set dev $swp4 down
+	ip link set dev $swp1 down
+	ip link set dev br1 down
+
+	ip link set dev lag2 nomaster
+	ip link del dev br1
+
+	ip link set dev $swp4 nomaster
+	ip link set dev $swp1 nomaster
+	team_destroy lag2
+}
+
+config_remaster_lag2()
+{
+	log_info "Remaster bridge slave"
+
+	ip link set dev lag2 nomaster
+	sleep 2
+	ip link set dev lag2 master br1
+}
+
+config_remaster_lag3()
+{
+	log_info "Move lag3 to the bridge, then out again"
+
+	ip link set dev lag3 master br1
+	sleep 2
+	ip link set dev lag3 nomaster
+}
+
+config_deslave()
+{
+	local netdev=$1; shift
+
+	log_info "Deslave $netdev"
+	ip link set dev $netdev down
+	ip link set dev $netdev nomaster
+	ip link set dev $netdev up
+}
+
+config_deslave_swp1()
+{
+	config_deslave $swp1
+}
+
+config_deslave_swp2()
+{
+	config_deslave $swp2
+}
+
+config_deslave_swp3()
+{
+	config_deslave $swp3
+}
+
+config_deslave_swp4()
+{
+	config_deslave $swp4
+}
+
+config_enslave()
+{
+	local netdev=$1; shift
+	local master=$1; shift
+
+	log_info "Enslave $netdev to $master"
+	ip link set dev $netdev down
+	ip link set dev $netdev master $master
+	ip link set dev $netdev up
+}
+
+config_enslave_swp1()
+{
+	config_enslave $swp1 lag2
+}
+
+config_enslave_swp2()
+{
+	config_enslave $swp2 lag3
+}
+
+config_enslave_swp3()
+{
+	config_enslave $swp3 lag3
+}
+
+config_enslave_swp4()
+{
+	config_enslave $swp4 lag2
+}
+
+config_wait()
+{
+	setup_wait_dev lag2
+	setup_wait_dev lag3
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h4=${NETIFS[p7]}
+	swp4=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test lag1 192.0.2.130
+}
+
+ping_ipv6()
+{
+	ping6_test lag1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh
new file mode 100755
index 000000000000..76e4941fef73
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +----------------------------+
+# |                   H1 (vrf) |
+# |   + $h1.10                 |                      +----------------------+
+# |   | 192.0.2.1/28           |                      |             H2 (vrf) |
+# |   | 2001:db8:1::1/64       |                      |  + $h2               |
+# |   |                        |                      |  | 192.0.2.130/28    |
+# |   + $h1                    |                      |  | 2001:db8:2::2/64  |
+# +---|------------------------+                      +--|-------------------+
+#     |                                                  |
+# +---|--------------------------------------------------|-------------------+
+# |   |                            router (main VRF)     |                   |
+# | +-|----------------------------------+               + $swp2             |
+# | | + $swp1      BR1 (802.1q, pvid=10) |                 192.0.2.129/28    |
+# | |              192.0.2.2/28          |                 2001:db8:2::1/64  |
+# | |              2001:db8:1::2/64      |                                   |
+# | +------------------------------------+                                   |
+# +--------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	shuffle_pvid
+	ping_ipv4
+	ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1
+	ip -4 route del 192.0.2.128/28 vrf v$h1
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.0/28 vrf v$h2
+	simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+	ip link add name br1 address $(mac_get $swp1) \
+		type bridge vlan_filtering 1 vlan_default_pvid 0
+	ip link set dev br1 up
+	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 up
+	__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+
+	bridge vlan add dev br1 vid 10 pvid untagged self
+	bridge vlan add dev $swp1 vid 10
+}
+
+router_destroy()
+{
+	bridge vlan del dev $swp1 vid 10
+	bridge vlan del dev br1 vid 10 self
+
+	__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+	ip link set dev $swp2 down
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+shuffle_pvid()
+{
+	log_info "Add and remove VLAN upper for PVID VLAN"
+
+	# Adding and removing a VLAN upper for the PVID VLAN shouldn't change
+	# anything. The address is arbitrary, just to make sure it will be an L3
+	# netdevice.
+	vlan_create br1 10 "" 192.0.2.33/28
+	sleep 1
+	vlan_destroy br1 10
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
index fa6a88c50750..b76a4a707a5b 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -1,10 +1,43 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# +------------------------------------------------+   +----------------------+
+# | H1 (vrf)                                       |   |             H2 (vrf) |
+# |    + $h1.555           + $h1.777               |   |  + $h2               |
+# |    | 192.0.2.1/28      | 192.0.2.17/28         |   |  | 192.0.2.130/28    |
+# |    | 2001:db8:1::1/64  | 2001:db8:3::1/64      |   |  | 192.0.2.146/28    |
+# |    | .-----------------'                       |   |  | 2001:db8:2::2/64  |
+# |    |/                                          |   |  | 2001:db8:4::2/64  |
+# |    + $h1                                       |   |  |                   |
+# +----|-------------------------------------------+   +--|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|-------------------------------+                  + $swp2             |
+# | |  + $swp1                         |                    192.0.2.129/28    |
+# | |    vid 555 777                   |                    192.0.2.145/28    |
+# | |                                  |                    2001:db8:2::1/64  |
+# | |  + BR1 (802.1q)                  |                    2001:db8:4::1/64  |
+# | |    vid 555 pvid untagged         |                                      |
+# | |    192.0.2.2/28                  |                                      |
+# | |    192.0.2.18/28                 |                                      |
+# | |    2001:db8:1::2/64              |                                      |
+# | |    2001:db8:3::2/64              |                                      |
+# | +----------------------------------+                                      |
+# +---------------------------------------------------------------------------+
+
 ALL_TESTS="
 	ping_ipv4
 	ping_ipv6
 	vlan
+	config_777
+	ping_ipv4_fails
+	ping_ipv6_fails
+	ping_ipv4_777
+	ping_ipv6_777
+	config_555
+	ping_ipv4
+	ping_ipv6
 "
 NUM_NETIFS=4
 source lib.sh
@@ -12,36 +45,52 @@ source lib.sh
 h1_create()
 {
 	simple_if_init $h1
+
 	vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
 	ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
 	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+
+	vlan_create $h1 777 v$h1 192.0.2.17/28 2001:db8:3::1/64
+	ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18
+	ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2
 }
 
 h1_destroy()
 {
+	ip -6 route del 2001:db8:4::/64 vrf v$h1
+	ip -4 route del 192.0.2.144/28 vrf v$h1
+	vlan_destroy $h1 777
+
 	ip -6 route del 2001:db8:2::/64 vrf v$h1
 	ip -4 route del 192.0.2.128/28 vrf v$h1
 	vlan_destroy $h1 555
+
 	simple_if_fini $h1
 }
 
 h2_create()
 {
-	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \
+			   192.0.2.146/28 2001:db8:4::2/64
 	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+	ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145
 	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+	ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1
 }
 
 h2_destroy()
 {
+	ip -6 route del 2001:db8:3::/64 vrf v$h2
 	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.16/28 vrf v$h2
 	ip -4 route del 192.0.2.0/28 vrf v$h2
-	simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+	simple_if_fini $h2 192.0.2.146/28 2001:db8:4::2/64 \
+			   192.0.2.130/28 2001:db8:2::2/64
 }
 
 router_create()
 {
-	ip link add name br1 type bridge vlan_filtering 1
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0
 	ip link set dev br1 up
 
 	ip link set dev $swp1 master br1
@@ -49,18 +98,23 @@ router_create()
 
 	bridge vlan add dev br1 vid 555 self pvid untagged
 	bridge vlan add dev $swp1 vid 555
+	bridge vlan add dev $swp1 vid 777
 
 	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+	__addr_add_del br1 add 192.0.2.18/28 2001:db8:3::2/64
 
 	ip link set dev $swp2 up
 	__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+	__addr_add_del $swp2 add 192.0.2.145/28 2001:db8:4::1/64
 }
 
 router_destroy()
 {
+	__addr_add_del $swp2 del 192.0.2.145/28 2001:db8:4::1/64
 	__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
 	ip link set dev $swp2 down
 
+	__addr_add_del br1 del 192.0.2.18/28 2001:db8:3::2/64
 	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
 	ip link set dev $swp1 down
 	ip link set dev $swp1 nomaster
@@ -86,6 +140,24 @@ setup_prepare()
 	forwarding_enable
 }
 
+config_555()
+{
+	log_info "Configure VLAN 555 as PVID"
+
+	bridge vlan add dev br1 vid 555 self pvid untagged
+	bridge vlan del dev br1 vid 777 self
+	sleep 2
+}
+
+config_777()
+{
+	log_info "Configure VLAN 777 as PVID"
+
+	bridge vlan add dev br1 vid 777 self pvid untagged
+	bridge vlan del dev br1 vid 555 self
+	sleep 2
+}
+
 cleanup()
 {
 	pre_cleanup
@@ -114,12 +186,32 @@ vlan()
 
 ping_ipv4()
 {
-	ping_test $h1 192.0.2.130
+	ping_test $h1.555 192.0.2.130
 }
 
 ping_ipv6()
 {
-	ping6_test $h1 2001:db8:2::2
+	ping6_test $h1.555 2001:db8:2::2
+}
+
+ping_ipv4_fails()
+{
+	ping_test_fails $h1.555 192.0.2.130 ": via 555"
+}
+
+ping_ipv6_fails()
+{
+	ping6_test_fails $h1.555 2001:db8:2::2 ": via 555"
+}
+
+ping_ipv4_777()
+{
+	ping_test $h1.777 192.0.2.146 ": via 777"
+}
+
+ping_ipv6_777()
+{
+	ping6_test $h1.777 2001:db8:4::2 ": via 777"
 }
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh
new file mode 100755
index 000000000000..215309ea1c8c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +------------------------+                           +----------------------+
+# | H1 (vrf)               |                           |             H2 (vrf) |
+# |    + $h1.555           |                           |  + $h2.777           |
+# |    | 192.0.2.1/28      |                           |  | 192.0.2.18/28     |
+# |    | 2001:db8:1::1/64  |                           |  | 2001:db8:2::2/64  |
+# |    |                   |                           |  |                   |
+# |    + $h1               |                           |  + $h2               |
+# +----|-------------------+                           +--|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1q)             + $swp2           | |
+# | |                                                                       | |
+# | +------+------------------------------------------+---------------------+ |
+# |        |                                          |                       |
+# |        + br1.555                                  + br1.777               |
+# |          192.0.2.2/28                               192.0.2.17/28         |
+# |          2001:db8:1::2/64                           2001:db8:2::1/64      |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	respin_config
+	ping_ipv4
+	ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	ip -4 route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1
+	ip -4 route del 192.0.2.16/28 vrf v$h1
+	vlan_destroy $h1 555
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 777 v$h2 192.0.2.18/28 2001:db8:2::2/64
+	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.0/28 vrf v$h2
+	vlan_destroy $h2 777
+	simple_if_fini $h2
+}
+
+router_create()
+{
+	ip link add name br1 address $(mac_get $swp1) \
+		type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp2 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	bridge vlan add dev br1 vid 555 self
+	bridge vlan add dev br1 vid 777 self
+	bridge vlan add dev $swp1 vid 555
+	bridge vlan add dev $swp2 vid 777
+
+	vlan_create br1 555 "" 192.0.2.2/28 2001:db8:1::2/64
+	vlan_create br1 777 "" 192.0.2.17/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+	vlan_destroy br1 777
+	vlan_destroy br1 555
+
+	bridge vlan del dev $swp2 vid 777
+	bridge vlan del dev $swp1 vid 555
+	bridge vlan del dev br1 vid 777 self
+	bridge vlan del dev br1 vid 555 self
+
+	ip link set dev $swp2 down nomaster
+	ip link set dev $swp1 down nomaster
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+respin_config()
+{
+	log_info "Remaster bridge slave"
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+
+	sleep 2
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp2 master br1
+
+	bridge vlan add dev $swp1 vid 555
+	bridge vlan add dev $swp2 vid 777
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh
new file mode 100755
index 000000000000..138558452402
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +----------------------------+
+# |                   H1 (vrf) |
+# |   + $h1.10                 |                      +----------------------+
+# |   | 192.0.2.1/28           |                      |             H2 (vrf) |
+# |   | 2001:db8:1::1/64       |                      |  + $h2               |
+# |   |                        |                      |  | 192.0.2.130/28    |
+# |   + $h1                    |                      |  | 2001:db8:2::2/64  |
+# +---|------------------------+                      +--|-------------------+
+#     |                                                  |
+# +---|--------------------------------------------------|-------------------+
+# |   |                            router (main VRF)     |                   |
+# | +-|--------------------------+                       + $swp2             |
+# | | + $swp1      BR1 (802.1q)  |                         192.0.2.129/28    |
+# | +-----+----------------------+                         2001:db8:2::1/64  |
+# |       |                                                                  |
+# |       + br1.10                                                           |
+# |         192.0.2.2/28                                                     |
+# |         2001:db8:1::2/64                                                 |
+# +--------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	pvid_set_unset
+	ping_ipv4
+	ping_ipv6
+	pvid_set_move
+	ping_ipv4
+	ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1
+	ip -4 route del 192.0.2.128/28 vrf v$h1
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.0/28 vrf v$h2
+	simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+	ip link add name br1 address $(mac_get $swp1) \
+		type bridge vlan_filtering 1 vlan_default_pvid 0
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 up
+	__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+
+	bridge vlan add dev br1 vid 10 self
+	bridge vlan add dev $swp1 vid 10
+	vlan_create br1 10 "" 192.0.2.2/28 2001:db8:1::2/64
+}
+
+router_destroy()
+{
+	vlan_destroy br1 10
+	bridge vlan del dev $swp1 vid 10
+	bridge vlan del dev br1 vid 10 self
+
+	__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+	ip link set dev $swp2 down
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+pvid_set_unset()
+{
+	log_info "Set and unset PVID on VLAN 10"
+
+	bridge vlan add dev br1 vid 10 pvid self
+	sleep 1
+	bridge vlan add dev br1 vid 10 self
+}
+
+pvid_set_move()
+{
+	log_info "Set PVID on VLAN 10, then move it to VLAN 20"
+
+	bridge vlan add dev br1 vid 10 pvid self
+	sleep 1
+	bridge vlan add dev br1 vid 20 pvid self
+}
+
+shuffle_vlan()
+{
+	log_info ""
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index cf3d26c233e8..3f0f5dc95542 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,9 +1,18 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_test
+	ping_ipv4_blackhole
+	ping_ipv6_blackhole
+	nh_stats_test_v4
+	nh_stats_test_v6
+"
 NUM_NETIFS=8
 source lib.sh
+source router_mpath_nh_lib.sh
 
 h1_create()
 {
@@ -197,8 +206,8 @@ multipath4_test()
 	t0_rp12=$(link_stats_tx_packets_get $rp12)
 	t0_rp13=$(link_stats_tx_packets_get $rp13)
 
-	ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
-		-d 1msec -t udp "sp=1024,dp=0-32768"
+	ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
 
 	t1_rp12=$(link_stats_tx_packets_get $rp12)
 	t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -212,7 +221,7 @@ multipath4_test()
 	sysctl_restore net.ipv4.fib_multipath_hash_policy
 }
 
-multipath6_l4_test()
+multipath6_test()
 {
 	local desc="$1"
 	local weight_rp12=$2
@@ -231,7 +240,7 @@ multipath6_l4_test()
 	t0_rp13=$(link_stats_tx_packets_get $rp13)
 
 	$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
-		-d 1msec -t udp "sp=1024,dp=0-32768"
+		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
 
 	t1_rp12=$(link_stats_tx_packets_get $rp12)
 	t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -245,34 +254,6 @@ multipath6_l4_test()
 	sysctl_restore net.ipv6.fib_multipath_hash_policy
 }
 
-multipath6_test()
-{
-	local desc="$1"
-	local weight_rp12=$2
-	local weight_rp13=$3
-	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
-	local packets_rp12 packets_rp13
-
-	ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
-
-	t0_rp12=$(link_stats_tx_packets_get $rp12)
-	t0_rp13=$(link_stats_tx_packets_get $rp13)
-
-	# Generate 16384 echo requests, each with a random flow label.
-	for _ in $(seq 1 16384); do
-		ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
-	done
-
-	t1_rp12=$(link_stats_tx_packets_get $rp12)
-	t1_rp13=$(link_stats_tx_packets_get $rp13)
-
-	let "packets_rp12 = $t1_rp12 - $t0_rp12"
-	let "packets_rp13 = $t1_rp13 - $t0_rp13"
-	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
-
-	ip nexthop replace id 106 group 104/105
-}
-
 multipath_test()
 {
 	log_info "Running IPv4 multipath tests"
@@ -280,15 +261,81 @@ multipath_test()
 	multipath4_test "Weighted MP 2:1" 2 1
 	multipath4_test "Weighted MP 11:45" 11 45
 
+	log_info "Running IPv4 multipath tests with IPv6 link-local nexthops"
+	ip nexthop replace id 101 via fe80:2::22 dev $rp12
+	ip nexthop replace id 102 via fe80:3::23 dev $rp13
+
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 102 via 169.254.3.23 dev $rp13
+	ip nexthop replace id 101 via 169.254.2.22 dev $rp12
+
 	log_info "Running IPv6 multipath tests"
 	multipath6_test "ECMP" 1 1
 	multipath6_test "Weighted MP 2:1" 2 1
 	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+ping_ipv4_blackhole()
+{
+	RET=0
+
+	ip nexthop add id 1001 blackhole
+	ip nexthop add id 1002 group 1001
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001
+	ping_do $h1 198.51.100.2
+	check_fail $? "ping did not fail when using a blackhole nexthop"
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002
+	ping_do $h1 198.51.100.2
+	check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103
+	ping_do $h1 198.51.100.2
+	check_err $? "ping failed with a valid nexthop"
+
+	log_test "IPv4 blackhole ping"
+
+	ip nexthop del id 1002
+	ip nexthop del id 1001
+}
 
-	log_info "Running IPv6 L4 hash multipath tests"
-	multipath6_l4_test "ECMP" 1 1
-	multipath6_l4_test "Weighted MP 2:1" 2 1
-	multipath6_l4_test "Weighted MP 11:45" 11 45
+ping_ipv6_blackhole()
+{
+	RET=0
+
+	ip -6 nexthop add id 1001 blackhole
+	ip nexthop add id 1002 group 1001
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001
+	ping6_do $h1 2001:db8:2::2
+	check_fail $? "ping did not fail when using a blackhole nexthop"
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002
+	ping6_do $h1 2001:db8:2::2
+	check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106
+	ping6_do $h1 2001:db8:2::2
+	check_err $? "ping failed with a valid nexthop"
+
+	log_test "IPv6 blackhole ping"
+
+	ip nexthop del id 1002
+	ip -6 nexthop del id 1001
+}
+
+nh_stats_test_v4()
+{
+	__nh_stats_test_v4 mpath
+}
+
+nh_stats_test_v6()
+{
+	__nh_stats_test_v6 mpath
 }
 
 setup_prepare()
@@ -312,7 +359,6 @@ setup_prepare()
 
 	router1_create
 	router2_create
-	routing_nh_obj
 
 	forwarding_enable
 }
@@ -345,7 +391,7 @@ ping_ipv6()
 ip nexthop ls >/dev/null 2>&1
 if [ $? -ne 0 ]; then
 	echo "Nexthop objects not supported; skipping tests"
-	exit 0
+	exit $ksft_skip
 fi
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
new file mode 100644
index 000000000000..7e7d62161c34
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+
+nh_stats_do_test()
+{
+	local what=$1; shift
+	local nh1_id=$1; shift
+	local nh2_id=$1; shift
+	local group_id=$1; shift
+	local stats_get=$1; shift
+	local mz="$@"
+
+	local dp
+
+	RET=0
+
+	sleep 2
+	for ((dp=0; dp < 60000; dp += 10000)); do
+		local dd
+		local t0_rp12=$(link_stats_tx_packets_get $rp12)
+		local t0_rp13=$(link_stats_tx_packets_get $rp13)
+		local t0_nh1=$($stats_get $group_id $nh1_id)
+		local t0_nh2=$($stats_get $group_id $nh2_id)
+
+		ip vrf exec vrf-h1 \
+			$mz -q -p 64 -d 0 -t udp \
+				"sp=1024,dp=$((dp))-$((dp + 10000))"
+		sleep 2
+
+		local t1_rp12=$(link_stats_tx_packets_get $rp12)
+		local t1_rp13=$(link_stats_tx_packets_get $rp13)
+		local t1_nh1=$($stats_get $group_id $nh1_id)
+		local t1_nh2=$($stats_get $group_id $nh2_id)
+
+		local d_rp12=$((t1_rp12 - t0_rp12))
+		local d_rp13=$((t1_rp13 - t0_rp13))
+		local d_nh1=$((t1_nh1 - t0_nh1))
+		local d_nh2=$((t1_nh2 - t0_nh2))
+
+		dd=$(absval $((d_rp12 - d_nh1)))
+		((dd < 10))
+		check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1"
+
+		dd=$(absval $((d_rp13 - d_nh2)))
+		((dd < 10))
+		check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2"
+	done
+
+	log_test "NH stats test $what"
+}
+
+nh_stats_test_dispatch_swhw()
+{
+	local what=$1; shift
+	local nh1_id=$1; shift
+	local nh2_id=$1; shift
+	local group_id=$1; shift
+	local mz="$@"
+
+	local used
+
+	nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
+			 nh_stats_get "${mz[@]}"
+
+	used=$(ip -s -j -d nexthop show id $group_id |
+		   jq '.[].hw_stats.used')
+	kind=$(ip -j -d link show dev $rp11 |
+		   jq -r '.[].linkinfo.info_kind')
+	if [[ $used == true ]]; then
+		nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
+				 nh_stats_get_hw "${mz[@]}"
+	elif [[ $kind == veth ]]; then
+		log_test_skip "HW stats not offloaded on veth topology"
+	fi
+}
+
+nh_stats_test_dispatch()
+{
+	local nhgtype=$1; shift
+	local what=$1; shift
+	local nh1_id=$1; shift
+	local nh2_id=$1; shift
+	local group_id=$1; shift
+	local mz="$@"
+
+	local enabled
+	local kind
+
+	if ! ip nexthop help 2>&1 | grep -q hw_stats; then
+		log_test_skip "NH stats test: ip doesn't support HW stats"
+		return
+	fi
+
+	ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+			   hw_stats on type $nhgtype
+	enabled=$(ip -s -j -d nexthop show id $group_id |
+		      jq '.[].hw_stats.enabled')
+	if [[ $enabled == true ]]; then
+		nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \
+					    "$group_id" "${mz[@]}"
+	elif [[ $enabled == false ]]; then
+		check_err 1 "HW stats still disabled after enabling"
+		log_test "NH stats test"
+	else
+		log_test_skip "NH stats test: ip doesn't report hw_stats info"
+	fi
+
+	ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+			   hw_stats off type $nhgtype
+}
+
+__nh_stats_test_v4()
+{
+	local nhgtype=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \
+			       $MZ $h1 -A 192.0.2.2 -B 198.51.100.2
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+__nh_stats_test_v6()
+{
+	local nhgtype=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+	nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \
+			       $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
new file mode 100755
index 000000000000..4b483d24ad00
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -0,0 +1,413 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_test
+	nh_stats_test_v4
+	nh_stats_test_v6
+"
+NUM_NETIFS=8
+source lib.sh
+source router_mpath_nh_lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip nexthop del id 103
+	ip nexthop del id 101
+	ip nexthop del id 102
+	ip nexthop del id 106
+	ip nexthop del id 104
+	ip nexthop del id 105
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip nexthop del id 201
+	ip nexthop del id 202
+	ip nexthop del id 204
+	ip nexthop del id 205
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+	ip nexthop add id 101 via 169.254.2.22 dev $rp12
+	ip nexthop add id 102 via 169.254.3.23 dev $rp13
+	ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+	ip nexthop add id 104 via fe80:2::22 dev $rp12
+	ip nexthop add id 105 via fe80:3::23 dev $rp13
+	ip nexthop add id 106 group 104/105 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+	ip nexthop add id 201 via 169.254.2.12 dev $rp22
+	ip nexthop add id 202 via 169.254.3.13 dev $rp23
+	ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+	ip nexthop add id 204 via fe80:2::12 dev $rp22
+	ip nexthop add id 205 via fe80:3::13 dev $rp23
+	ip nexthop add id 206 group 204/205 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the provided weights.
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	# Restore settings.
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the provided weights.
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+	# Without an idle timer, weight replacement should happen immediately.
+	log_info "Running multipath tests without an idle timer"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+	log_info "Running IPv4 multipath tests"
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 2 1
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+	# With an idle timer, weight replacement should not happen, so the
+	# expected ratio should always be the initial one (1:1).
+	log_info "Running multipath tests with an idle timer of 120 seconds"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 120
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 120
+
+	log_info "Running IPv4 multipath tests"
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 1 1
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 1 1
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 1 1
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 1 1
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+	# With a short idle timer and enough idle time, weight replacement
+	# should happen.
+	log_info "Running multipath tests with an idle timer of 5 seconds"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 5
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 5
+
+	log_info "Running IPv4 multipath tests"
+	sleep 10
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	sleep 10
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 2 1
+	sleep 10
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	sleep 10
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	sleep 10
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	sleep 10
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
+nh_stats_test_v4()
+{
+	__nh_stats_test_v4 resilient
+}
+
+nh_stats_test_v6()
+{
+	__nh_stats_test_v6 resilient
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+	echo "Nexthop objects not supported; skipping tests"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 57e90c873a2c..5a58b1ec8aef 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -28,7 +28,7 @@
 # +------------------+       +------------------+
 #
 
-ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6"
+ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6 unres_v4 unres_v6"
 NUM_NETIFS=6
 source lib.sh
 source tc_common.sh
@@ -406,6 +406,96 @@ rpf_v6()
 	log_test "RPF IPv6"
 }
 
+unres_v4()
+{
+	# Send a multicast packet not corresponding to an installed route,
+	# causing the kernel to queue the packet for resolution and emit an
+	# IGMPMSG_NOCACHE notification. smcrouted will react to this
+	# notification by consulting its (*, G) list and installing an (S, G)
+	# route, which will be used to forward the queued packet.
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1 flower \
+		dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop
+	tc filter add dev $h3 ingress protocol ip pref 1 handle 1 flower \
+		dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop
+
+	# Forwarding should fail before installing a matching (*, G).
+	$MZ $h1 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+		-a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \
+		-A 198.51.100.2 -B 225.1.2.3 -q
+
+	tc_check_packets "dev $h2 ingress" 1 0
+	check_err $? "Multicast received on first host when should not"
+	tc_check_packets "dev $h3 ingress" 1 0
+	check_err $? "Multicast received on second host when should not"
+
+	# Create (*, G). Will not be installed in the kernel.
+	create_mcast_sg $rp1 0.0.0.0 225.1.2.3 $rp2 $rp3
+
+	$MZ $h1 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+		-a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \
+		-A 198.51.100.2 -B 225.1.2.3 -q
+
+	tc_check_packets "dev $h2 ingress" 1 1
+	check_err $? "Multicast not received on first host"
+	tc_check_packets "dev $h3 ingress" 1 1
+	check_err $? "Multicast not received on second host"
+
+	delete_mcast_sg $rp1 0.0.0.0 225.1.2.3 $rp2 $rp3
+
+	tc filter del dev $h3 ingress protocol ip pref 1 handle 1 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1 flower
+
+	log_test "Unresolved queue IPv4"
+}
+
+unres_v6()
+{
+	# Send a multicast packet not corresponding to an installed route,
+	# causing the kernel to queue the packet for resolution and emit an
+	# MRT6MSG_NOCACHE notification. smcrouted will react to this
+	# notification by consulting its (*, G) list and installing an (S, G)
+	# route, which will be used to forward the queued packet.
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 1 flower \
+		dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop
+	tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 1 flower \
+		dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop
+
+	# Forwarding should fail before installing a matching (*, G).
+	$MZ $h1 -6 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+		-a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \
+		-A 2001:db8:1::2 -B ff0e::3 -q
+
+	tc_check_packets "dev $h2 ingress" 1 0
+	check_err $? "Multicast received on first host when should not"
+	tc_check_packets "dev $h3 ingress" 1 0
+	check_err $? "Multicast received on second host when should not"
+
+	# Create (*, G). Will not be installed in the kernel.
+	create_mcast_sg $rp1 :: ff0e::3 $rp2 $rp3
+
+	$MZ $h1 -6 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+		-a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \
+		-A 2001:db8:1::2 -B ff0e::3 -q
+
+	tc_check_packets "dev $h2 ingress" 1 1
+	check_err $? "Multicast not received on first host"
+	tc_check_packets "dev $h3 ingress" 1 1
+	check_err $? "Multicast not received on second host"
+
+	delete_mcast_sg $rp1 :: ff0e::3 $rp2 $rp3
+
+	tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 1 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 1 flower
+
+	log_test "Unresolved queue IPv6"
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 79a209927962..e2be354167a1 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -178,8 +178,8 @@ multipath4_test()
        t0_rp12=$(link_stats_tx_packets_get $rp12)
        t0_rp13=$(link_stats_tx_packets_get $rp13)
 
-       ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
-	       -d 1msec -t udp "sp=1024,dp=0-32768"
+       ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+	       -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
 
        t1_rp12=$(link_stats_tx_packets_get $rp12)
        t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -195,7 +195,7 @@ multipath4_test()
        sysctl_restore net.ipv4.fib_multipath_hash_policy
 }
 
-multipath6_l4_test()
+multipath6_test()
 {
        local desc="$1"
        local weight_rp12=$2
@@ -216,7 +216,7 @@ multipath6_l4_test()
        t0_rp13=$(link_stats_tx_packets_get $rp13)
 
        $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
-	       -d 1msec -t udp "sp=1024,dp=0-32768"
+	       -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
 
        t1_rp12=$(link_stats_tx_packets_get $rp12)
        t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -232,38 +232,6 @@ multipath6_l4_test()
        sysctl_restore net.ipv6.fib_multipath_hash_policy
 }
 
-multipath6_test()
-{
-       local desc="$1"
-       local weight_rp12=$2
-       local weight_rp13=$3
-       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
-       local packets_rp12 packets_rp13
-
-       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
-	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
-	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
-
-       t0_rp12=$(link_stats_tx_packets_get $rp12)
-       t0_rp13=$(link_stats_tx_packets_get $rp13)
-
-       # Generate 16384 echo requests, each with a random flow label.
-       for _ in $(seq 1 16384); do
-	       ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
-       done
-
-       t1_rp12=$(link_stats_tx_packets_get $rp12)
-       t1_rp13=$(link_stats_tx_packets_get $rp13)
-
-       let "packets_rp12 = $t1_rp12 - $t0_rp12"
-       let "packets_rp13 = $t1_rp13 - $t0_rp13"
-       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
-
-       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
-	       nexthop via fe80:2::22 dev $rp12 \
-	       nexthop via fe80:3::23 dev $rp13
-}
-
 multipath_test()
 {
 	log_info "Running IPv4 multipath tests"
@@ -275,11 +243,6 @@ multipath_test()
 	multipath6_test "ECMP" 1 1
 	multipath6_test "Weighted MP 2:1" 2 1
 	multipath6_test "Weighted MP 11:45" 11 45
-
-	log_info "Running IPv6 L4 hash multipath tests"
-	multipath6_l4_test "ECMP" 1 1
-	multipath6_l4_test "Weighted MP 2:1" 2 1
-	multipath6_l4_test "Weighted MP 11:45" 11 45
 }
 
 setup_prepare()
diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh
new file mode 100755
index 000000000000..f3a53738bdcc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_nh.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp2 clsact
+
+	ip address add 192.0.2.1/24 dev $rp1
+	ip address add 2001:db8:1::1/64 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:2::1/64 dev $rp2
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 2001:db8:1::1/64 dev $rp1
+	ip address del 192.0.2.1/24 dev $rp1
+
+	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+routing_nh_obj()
+{
+	# Create the nexthops as AF_INET6, so that IPv4 and IPv6 routes could
+	# use them.
+	ip -6 nexthop add id 101 dev $rp1
+	ip -6 nexthop add id 102 dev $rp2
+
+	ip route replace 192.0.2.0/24 nhid 101
+	ip route replace 2001:db8:1::/64 nhid 101
+	ip route replace 198.51.100.0/24 nhid 102
+	ip route replace 2001:db8:2::/64 nhid 102
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh
index a7306c7ac06d..865c9f7d8143 100755
--- a/tools/testing/selftests/net/forwarding/router_vid_1.sh
+++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh
@@ -1,7 +1,32 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6"
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |            $h1.1 + |                     | + $h2.1              |
+# |     192.0.2.2/24 | |                     | | 198.51.100.2/24    |
+# | 2001:db8:1::2/64 | |                     | | 2001:db8:2::2/64   |
+# |                  | |                     | |                    |
+# |              $h1 + |                     | + $h2                |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |                  |                         |                    |
+# |           $rp1.1 +                         + $rp2.1             |
+# |     192.0.2.1/24                             198.51.100.1/24    |
+# | 2001:db8:1::1/64                             2001:db8:2::1/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
 NUM_NETIFS=4
 source lib.sh
 
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
index e714bae473fb..81f31179ac88 100755
--- a/tools/testing/selftests/net/forwarding/sch_red.sh
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 # This test sends one stream of traffic from H1 through a TBF shaper, to a RED
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
index 8bd85da1905a..df9bcd6a811a 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
@@ -4,9 +4,12 @@
 ALL_TESTS="
 	ping_ipv4
 	tbf_test
+	tbf_root_test
 "
 source $lib_dir/sch_tbf_core.sh
 
+QDISC_TYPE=${QDISC% *}
+
 tbf_test_one()
 {
 	local bs=$1; shift
@@ -22,6 +25,8 @@ tbf_test_one()
 
 tbf_test()
 {
+	log_info "Testing root-$QDISC_TYPE-tbf"
+
 	# This test is used for both ETS and PRIO. Even though we only need two
 	# bands, PRIO demands a minimum of three.
 	tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
@@ -29,6 +34,33 @@ tbf_test()
 	tc qdisc del dev $swp2 root
 }
 
+tbf_root_test()
+{
+	local bs=128K
+
+	log_info "Testing root-tbf-$QDISC_TYPE"
+
+	tc qdisc replace dev $swp2 root handle 1: \
+		tbf rate 400Mbit burst $bs limit 1M
+	tc qdisc replace dev $swp2 parent 1:1 handle 10: \
+		$QDISC 3 priomap 2 1 0
+	tc qdisc replace dev $swp2 parent 10:3 handle 103: \
+		bfifo limit 1M
+	tc qdisc replace dev $swp2 parent 10:2 handle 102: \
+		bfifo limit 1M
+	tc qdisc replace dev $swp2 parent 10:1 handle 101: \
+		bfifo limit 1M
+
+	do_tbf_test 10 400 $bs
+	do_tbf_test 11 400 $bs
+
+	tc qdisc del dev $swp2 root
+}
+
+if type -t sch_tbf_pre_hook >/dev/null; then
+	sch_tbf_pre_hook
+fi
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
index 72aa21ba88c7..96c997be0d03 100755
--- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
@@ -23,6 +23,10 @@ tbf_test()
 	tc qdisc del dev $swp2 root
 }
 
+if type -t sch_tbf_pre_hook >/dev/null; then
+	sch_tbf_pre_hook
+fi
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/net/forwarding/settings
index e7b9417537fb..e7b9417537fb 100644
--- a/tools/testing/selftests/powerpc/dscr/settings
+++ b/tools/testing/selftests/net/forwarding/settings
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
index e3bd8a6bb8b4..3dd5fcbd3eaa 100755
--- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -54,7 +54,9 @@ h2_destroy()
 
 switch_create()
 {
-	ip link add name br1 up type bridge vlan_filtering 1
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
 	ip link set dev $swp1 master br1
 	ip link set dev $swp1 up
 	ip link set dev $swp2 master br1
@@ -72,7 +74,9 @@ switch_destroy()
 	tc qdisc del dev $swp2 clsact
 	tc qdisc del dev $swp1 clsact
 
+	ip link set dev $swp2 down
 	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
 	ip link set dev $swp1 nomaster
 	ip link del dev br1
 }
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index d9eca227136b..589629636502 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -3,20 +3,25 @@
 
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
 	mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
-	gact_trap_test"
+	gact_trap_test mirred_egress_to_ingress_test \
+	mirred_egress_to_ingress_tcp_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
 
+require_command ncat
+
 tcflags="skip_hw"
 
 h1_create()
 {
 	simple_if_init $h1 192.0.2.1/24
+	tc qdisc add dev $h1 clsact
 }
 
 h1_destroy()
 {
+	tc qdisc del dev $h1 clsact
 	simple_if_fini $h1 192.0.2.1/24
 }
 
@@ -58,7 +63,7 @@ mirred_egress_test()
 	RET=0
 
 	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
-		$tcflags dst_ip 192.0.2.2 action drop
+		dst_ip 192.0.2.2 action drop
 
 	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
 		-t ip -q
@@ -153,6 +158,92 @@ gact_trap_test()
 	log_test "trap ($tcflags)"
 }
 
+mirred_egress_to_ingress_test()
+{
+	RET=0
+
+	tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
+		ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action \
+			ct commit nat src addr 192.0.2.2 pipe \
+			ct clear pipe \
+			ct commit nat dst addr 192.0.2.1 pipe \
+			mirred ingress redirect dev $h1
+
+	tc filter add dev $swp1 protocol ip pref 11 handle 111 ingress flower \
+		ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action drop
+	tc filter add dev $swp1 protocol ip pref 12 handle 112 ingress flower \
+		ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 0 action pass
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t icmp "ping,id=42,seq=10" -q
+
+	tc_check_packets "dev $h1 egress" 100 1
+	check_err $? "didn't mirror first packet"
+
+	tc_check_packets "dev $swp1 ingress" 111 1
+	check_fail $? "didn't redirect first packet"
+	tc_check_packets "dev $swp1 ingress" 112 1
+	check_err $? "didn't receive reply to first packet"
+
+	ping 192.0.2.2 -I$h1 -c1 -w1 -q 1>/dev/null 2>&1
+
+	tc_check_packets "dev $h1 egress" 100 2
+	check_err $? "didn't mirror second packet"
+	tc_check_packets "dev $swp1 ingress" 111 1
+	check_fail $? "didn't redirect second packet"
+	tc_check_packets "dev $swp1 ingress" 112 2
+	check_err $? "didn't receive reply to second packet"
+
+	tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
+	tc filter del dev $swp1 ingress protocol ip pref 11 handle 111 flower
+	tc filter del dev $swp1 ingress protocol ip pref 12 handle 112 flower
+
+	log_test "mirred_egress_to_ingress ($tcflags)"
+}
+
+mirred_egress_to_ingress_tcp_test()
+{
+	mirred_e2i_tf1=$(mktemp) mirred_e2i_tf2=$(mktemp)
+
+	RET=0
+	dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$mirred_e2i_tf1
+	tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
+		$tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
+			action ct commit nat src addr 192.0.2.2 pipe \
+			action ct clear pipe \
+			action ct commit nat dst addr 192.0.2.1 pipe \
+			action ct clear pipe \
+			action skbedit ptype host pipe \
+			action mirred ingress redirect dev $h1
+	tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \
+		$tcflags ip_proto icmp \
+			action mirred ingress redirect dev $h1
+	tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \
+		ip_proto icmp \
+			action drop
+
+	ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
+	local rpid=$!
+	ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
+	wait -n $rpid
+	cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
+	check_err $? "server output check failed"
+
+	$MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
+		-t icmp "ping,id=42,seq=5" -q
+	tc_check_packets "dev $h1 egress" 101 10
+	check_err $? "didn't mirred redirect ICMP"
+	tc_check_packets "dev $h1 ingress" 102 10
+	check_err $? "didn't drop mirred ICMP"
+
+	tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
+	tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
+	tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
+
+	rm -f $mirred_e2i_tf1 $mirred_e2i_tf2
+	log_test "mirred_egress_to_ingress_tcp ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -178,6 +269,8 @@ setup_prepare()
 
 cleanup()
 {
+	local tf
+
 	pre_cleanup
 
 	switch_destroy
@@ -188,6 +281,8 @@ cleanup()
 
 	ip link set $swp2 address $swp2origmac
 	ip link set $swp1 address $swp1origmac
+
+	for tf in $mirred_e2i_tf1 $mirred_e2i_tf2; do rm -f $tf; done
 }
 
 mirred_egress_redirect_test()
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
index 2934fb5ed2a2..b95de0463ebd 100755
--- a/tools/testing/selftests/net/forwarding/tc_chains.sh
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -136,7 +136,7 @@ template_filter_fits()
 
 	tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \
 		flower src_mac $h2mac action drop &> /dev/null
-	check_fail $? "Incorrectly succeded to insert filter which does not template"
+	check_fail $? "Incorrectly succeeded to insert filter which does not template"
 
 	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
 		flower src_mac $h2mac action drop
@@ -144,7 +144,7 @@ template_filter_fits()
 
 	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
 		flower dst_mac $h2mac action drop &> /dev/null
-	check_fail $? "Incorrectly succeded to insert filter which does not template"
+	check_fail $? "Incorrectly succeeded to insert filter which does not template"
 
 	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
 		flower &> /dev/null
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index 0e18e8be6e2a..bce8bb8d2b6f 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -16,6 +16,16 @@ tc_check_packets()
 		 tc_rule_handle_stats_get "$id" "$handle" > /dev/null
 }
 
+tc_check_at_least_x_packets()
+{
+	local id=$1
+	local handle=$2
+	local count=$3
+
+	busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $count" \
+		 tc_rule_handle_stats_get "$id" "$handle" > /dev/null
+}
+
 tc_check_packets_hitting()
 {
 	local id=$1
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 058c746ee300..b1daad19b01e 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -3,7 +3,10 @@
 
 ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
 	match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \
-	match_ip_tos_test match_indev_test"
+	match_ip_tos_test match_indev_test match_ip_ttl_test
+	match_mpls_label_test \
+	match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
+	match_mpls_lse_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -49,8 +52,8 @@ match_dst_mac_test()
 	tc_check_packets "dev $h2 ingress" 101 1
 	check_fail $? "Matched on a wrong filter"
 
-	tc_check_packets "dev $h2 ingress" 102 1
-	check_err $? "Did not match on correct filter"
+	tc_check_packets "dev $h2 ingress" 102 0
+	check_fail $? "Did not match on correct filter"
 
 	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
 	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -75,8 +78,8 @@ match_src_mac_test()
 	tc_check_packets "dev $h2 ingress" 101 1
 	check_fail $? "Matched on a wrong filter"
 
-	tc_check_packets "dev $h2 ingress" 102 1
-	check_err $? "Did not match on correct filter"
+	tc_check_packets "dev $h2 ingress" 102 0
+	check_fail $? "Did not match on correct filter"
 
 	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
 	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -310,6 +313,42 @@ match_ip_tos_test()
 	log_test "ip_tos match ($tcflags)"
 }
 
+match_ip_ttl_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 ip_ttl 63 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "ttl=63" -q
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "ttl=63,mf,frag=256" -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on the wrong filter (no check on ttl)"
+
+	tc_check_packets "dev $h2 ingress" 101 2
+	check_err $? "Did not match on correct filter (ttl=63)"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "ttl=255" -q
+
+	tc_check_packets "dev $h2 ingress" 101 3
+	check_fail $? "Matched on a wrong filter (ttl=63)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter (no check on ttl)"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "ip_ttl match ($tcflags)"
+}
+
 match_indev_test()
 {
 	RET=0
@@ -334,6 +373,309 @@ match_indev_test()
 	log_test "indev match ($tcflags)"
 }
 
+# Unfortunately, mausezahn can't build MPLS headers when used in L2
+# mode, so we have this function to build Label Stack Entries.
+mpls_lse()
+{
+	local label=$1
+	local tc=$2
+	local bos=$3
+	local ttl=$4
+
+	printf "%02x %02x %02x %02x"                        \
+		$((label >> 12))                            \
+		$((label >> 4 & 0xff))                      \
+		$((((label & 0xf) << 4) + (tc << 1) + bos)) \
+		$ttl
+}
+
+match_mpls_label_test()
+{
+	local ethtype="88 47"; readonly ethtype
+	local pkt
+
+	RET=0
+
+	check_tc_mpls_support $h2 || return 0
+
+	tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+		flower $tcflags mpls_label 0 action drop
+	tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+		flower $tcflags mpls_label 1048575 action drop
+
+	pkt="$ethtype $(mpls_lse 1048575 0 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter (1048575)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter (1048575)"
+
+	pkt="$ethtype $(mpls_lse 0 0 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_fail $? "Matched on a wrong filter (0)"
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter (0)"
+
+	tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+	log_test "mpls_label match ($tcflags)"
+}
+
+match_mpls_tc_test()
+{
+	local ethtype="88 47"; readonly ethtype
+	local pkt
+
+	RET=0
+
+	check_tc_mpls_support $h2 || return 0
+
+	tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+		flower $tcflags mpls_tc 0 action drop
+	tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+		flower $tcflags mpls_tc 7 action drop
+
+	pkt="$ethtype $(mpls_lse 0 7 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter (7)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter (7)"
+
+	pkt="$ethtype $(mpls_lse 0 0 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_fail $? "Matched on a wrong filter (0)"
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter (0)"
+
+	tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+	log_test "mpls_tc match ($tcflags)"
+}
+
+match_mpls_bos_test()
+{
+	local ethtype="88 47"; readonly ethtype
+	local pkt
+
+	RET=0
+
+	check_tc_mpls_support $h2 || return 0
+
+	tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+		flower $tcflags mpls_bos 0 action drop
+	tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+		flower $tcflags mpls_bos 1 action drop
+
+	pkt="$ethtype $(mpls_lse 0 0 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter (1)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter (1)"
+
+	# Need to add a second label to properly mark the Bottom of Stack
+	pkt="$ethtype $(mpls_lse 0 0 0 255) $(mpls_lse 0 0 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_fail $? "Matched on a wrong filter (0)"
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter (0)"
+
+	tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+	log_test "mpls_bos match ($tcflags)"
+}
+
+match_mpls_ttl_test()
+{
+	local ethtype="88 47"; readonly ethtype
+	local pkt
+
+	RET=0
+
+	check_tc_mpls_support $h2 || return 0
+
+	tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+		flower $tcflags mpls_ttl 0 action drop
+	tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+		flower $tcflags mpls_ttl 255 action drop
+
+	pkt="$ethtype $(mpls_lse 0 0 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter (255)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter (255)"
+
+	pkt="$ethtype $(mpls_lse 0 0 1 0)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_fail $? "Matched on a wrong filter (0)"
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter (0)"
+
+	tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+	log_test "mpls_ttl match ($tcflags)"
+}
+
+match_mpls_lse_test()
+{
+	local ethtype="88 47"; readonly ethtype
+	local pkt
+
+	RET=0
+
+	check_tc_mpls_lse_stats $h2 || return 0
+
+	# Match on first LSE (minimal values for each field)
+	tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+		flower $tcflags mpls lse depth 1 label 0 action continue
+	tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+		flower $tcflags mpls lse depth 1 tc 0 action continue
+	tc filter add dev $h2 ingress protocol mpls_uc pref 3 handle 103 \
+		flower $tcflags mpls lse depth 1 bos 0 action continue
+	tc filter add dev $h2 ingress protocol mpls_uc pref 4 handle 104 \
+		flower $tcflags mpls lse depth 1 ttl 0 action continue
+
+	# Match on second LSE (maximal values for each field)
+	tc filter add dev $h2 ingress protocol mpls_uc pref 5 handle 105 \
+		flower $tcflags mpls lse depth 2 label 1048575 action continue
+	tc filter add dev $h2 ingress protocol mpls_uc pref 6 handle 106 \
+		flower $tcflags mpls lse depth 2 tc 7 action continue
+	tc filter add dev $h2 ingress protocol mpls_uc pref 7 handle 107 \
+		flower $tcflags mpls lse depth 2 bos 1 action continue
+	tc filter add dev $h2 ingress protocol mpls_uc pref 8 handle 108 \
+		flower $tcflags mpls lse depth 2 ttl 255 action continue
+
+	# Match on LSE depth
+	tc filter add dev $h2 ingress protocol mpls_uc pref 9 handle 109 \
+		flower $tcflags mpls lse depth 1 action continue
+	tc filter add dev $h2 ingress protocol mpls_uc pref 10 handle 110 \
+		flower $tcflags mpls lse depth 2 action continue
+	tc filter add dev $h2 ingress protocol mpls_uc pref 11 handle 111 \
+		flower $tcflags mpls lse depth 3 action continue
+
+	# Base packet, matched by all filters (except for stack depth 3)
+	pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Make a variant of the above packet, with a non-matching value
+	# for each LSE field
+
+	# Wrong label at depth 1
+	pkt="$ethtype $(mpls_lse 1 0 0 0) $(mpls_lse 1048575 7 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Wrong TC at depth 1
+	pkt="$ethtype $(mpls_lse 0 1 0 0) $(mpls_lse 1048575 7 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Wrong BOS at depth 1 (not adding a second LSE here since BOS is set
+	# in the first label, so anything that'd follow wouldn't be considered)
+	pkt="$ethtype $(mpls_lse 0 0 1 0)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Wrong TTL at depth 1
+	pkt="$ethtype $(mpls_lse 0 0 0 1) $(mpls_lse 1048575 7 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Wrong label at depth 2
+	pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048574 7 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Wrong TC at depth 2
+	pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 6 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Wrong BOS at depth 2 (adding a third LSE here since BOS isn't set in
+	# the second label)
+	pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 0 255)"
+	pkt="$pkt $(mpls_lse 0 0 1 255)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Wrong TTL at depth 2
+	pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 1 254)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	# Filters working at depth 1 should match all packets but one
+
+	tc_check_packets "dev $h2 ingress" 101 8
+	check_err $? "Did not match on correct filter"
+
+	tc_check_packets "dev $h2 ingress" 102 8
+	check_err $? "Did not match on correct filter"
+
+	tc_check_packets "dev $h2 ingress" 103 8
+	check_err $? "Did not match on correct filter"
+
+	tc_check_packets "dev $h2 ingress" 104 8
+	check_err $? "Did not match on correct filter"
+
+	# Filters working at depth 2 should match all packets but two (because
+	# of the test packet where the label stack depth is just one)
+
+	tc_check_packets "dev $h2 ingress" 105 7
+	check_err $? "Did not match on correct filter"
+
+	tc_check_packets "dev $h2 ingress" 106 7
+	check_err $? "Did not match on correct filter"
+
+	tc_check_packets "dev $h2 ingress" 107 7
+	check_err $? "Did not match on correct filter"
+
+	tc_check_packets "dev $h2 ingress" 108 7
+	check_err $? "Did not match on correct filter"
+
+	# Finally, verify the filters that only match on LSE depth
+
+	tc_check_packets "dev $h2 ingress" 109 9
+	check_err $? "Did not match on correct filter"
+
+	tc_check_packets "dev $h2 ingress" 110 8
+	check_err $? "Did not match on correct filter"
+
+	tc_check_packets "dev $h2 ingress" 111 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol mpls_uc pref 11 handle 111 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 10 handle 110 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 9 handle 109 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 8 handle 108 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 7 handle 107 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 6 handle 106 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 5 handle 105 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 4 handle 104 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+	log_test "mpls lse match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh
new file mode 100755
index 000000000000..3ca20df952eb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="match_cfm_opcode match_cfm_level match_cfm_level_and_opcode"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+u8_to_hex()
+{
+	local u8=$1; shift
+
+	printf "%02x" $u8
+}
+
+generate_cfm_hdr()
+{
+	local mdl=$1; shift
+	local op=$1; shift
+	local flags=$1; shift
+	local tlv_offset=$1; shift
+
+	local cfm_hdr=$(:
+	               )"$(u8_to_hex $((mdl << 5))):"$( 	: MD level and Version
+	               )"$(u8_to_hex $op):"$(			: OpCode
+	               )"$(u8_to_hex $flags):"$(		: Flags
+	               )"$(u8_to_hex $tlv_offset)"$(		: TLV offset
+	               )
+
+	echo $cfm_hdr
+}
+
+match_cfm_opcode()
+{
+	local ethtype="89 02"; readonly ethtype
+	RET=0
+
+	tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+	   flower cfm op 47 action drop
+	tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+	   flower cfm op 43 action drop
+
+	pkt="$ethtype $(generate_cfm_hdr 7 47 0 32)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+	pkt="$ethtype $(generate_cfm_hdr 6 5 0 4)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct opcode"
+
+	tc_check_packets "dev $h2 ingress" 102 0
+	check_err $? "Matched on the wrong opcode"
+
+	pkt="$ethtype $(generate_cfm_hdr 0 43 0 12)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Matched on the wrong opcode"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct opcode"
+
+	tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+
+	log_test "CFM opcode match test"
+}
+
+match_cfm_level()
+{
+	local ethtype="89 02"; readonly ethtype
+	RET=0
+
+	tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+	   flower cfm mdl 5 action drop
+	tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+	   flower cfm mdl 3 action drop
+	tc filter add dev $h2 ingress protocol cfm pref 1 handle 103 \
+	   flower cfm mdl 0 action drop
+
+	pkt="$ethtype $(generate_cfm_hdr 5 42 0 12)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+	pkt="$ethtype $(generate_cfm_hdr 6 1 0 70)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+	pkt="$ethtype $(generate_cfm_hdr 0 1 0 70)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct level"
+
+	tc_check_packets "dev $h2 ingress" 102 0
+	check_err $? "Matched on the wrong level"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct level"
+
+	pkt="$ethtype $(generate_cfm_hdr 3 0 0 4)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Matched on the wrong level"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct level"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Matched on the wrong level"
+
+	tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+	tc filter del dev $h2 ingress protocol cfm pref 1 handle 103 flower
+
+	log_test "CFM level match test"
+}
+
+match_cfm_level_and_opcode()
+{
+	local ethtype="89 02"; readonly ethtype
+	RET=0
+
+	tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+	   flower cfm mdl 5 op 41 action drop
+	tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+	   flower cfm mdl 7 op 42 action drop
+
+	pkt="$ethtype $(generate_cfm_hdr 5 41 0 4)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+	pkt="$ethtype $(generate_cfm_hdr 7 3 0 4)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+	pkt="$ethtype $(generate_cfm_hdr 3 42 0 12)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct level and opcode"
+
+	tc_check_packets "dev $h2 ingress" 102 0
+	check_err $? "Matched on the wrong level and opcode"
+
+	pkt="$ethtype $(generate_cfm_hdr 7 42 0 12)"
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Matched on the wrong level and opcode"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct level and opcode"
+
+	tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+
+	log_test "CFM opcode and level match test"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
new file mode 100755
index 000000000000..c2420bb72c12
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -0,0 +1,357 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                             +----------------------+
+# | H1 (vrf)              |                             | H2 (vrf)             |
+# |    + $h1              |                             |              $h2 +   |
+# |    | 192.0.2.1/28     |                             |     192.0.2.2/28 |   |
+# |    | 2001:db8:1::1/64 |                             | 2001:db8:1::2/64 |   |
+# +----|------------------+                             +------------------|---+
+#      |                                                                   |
+# +----|-------------------------------------------------------------------|---+
+# | SW |                                                                   |   |
+# |  +-|-------------------------------------------------------------------|-+ |
+# |  | + $swp1                       BR                              $swp2 + | |
+# |  +-----------------------------------------------------------------------+ |
+# +----------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_l2_miss_unicast
+	test_l2_miss_multicast
+	test_l2_miss_ll_multicast
+	test_l2_miss_broadcast
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add name br1 up type bridge
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+test_l2_miss_unicast()
+{
+	local dmac=00:01:02:03:04:05
+	local dip=192.0.2.2
+	local sip=192.0.2.1
+
+	RET=0
+
+	# Unknown unicast.
+	tc filter add dev $swp2 egress protocol ipv4 handle 101 pref 1 \
+	   flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \
+	   dst_ip $dip action pass
+	# Known unicast.
+	tc filter add dev $swp2 egress protocol ipv4 handle 102 pref 1 \
+	   flower indev $swp1 l2_miss 0 dst_mac $dmac src_ip $sip \
+	   dst_ip $dip action pass
+
+	# Before adding FDB entry.
+	$MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+	tc_check_packets "dev $swp2 egress" 101 1
+	check_err $? "Unknown unicast filter was not hit before adding FDB entry"
+
+	tc_check_packets "dev $swp2 egress" 102 0
+	check_err $? "Known unicast filter was hit before adding FDB entry"
+
+	# Adding FDB entry.
+	bridge fdb replace $dmac dev $swp2 master static
+
+	$MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+	tc_check_packets "dev $swp2 egress" 101 1
+	check_err $? "Unknown unicast filter was hit after adding FDB entry"
+
+	tc_check_packets "dev $swp2 egress" 102 1
+	check_err $? "Known unicast filter was not hit after adding FDB entry"
+
+	# Deleting FDB entry.
+	bridge fdb del $dmac dev $swp2 master static
+
+	$MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+	tc_check_packets "dev $swp2 egress" 101 2
+	check_err $? "Unknown unicast filter was not hit after deleting FDB entry"
+
+	tc_check_packets "dev $swp2 egress" 102 1
+	check_err $? "Known unicast filter was hit after deleting FDB entry"
+
+	tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 102 flower
+	tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 101 flower
+
+	log_test "L2 miss - Unicast"
+}
+
+test_l2_miss_multicast_common()
+{
+	local proto=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local dmac=$1; shift
+	local mode=$1; shift
+	local name=$1; shift
+
+	RET=0
+
+	# Unregistered multicast.
+	tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+	   flower indev $swp1 l2_miss 1 src_ip $sip dst_ip $dip \
+	   action pass
+	# Registered multicast.
+	tc filter add dev $swp2 egress protocol $proto handle 102 pref 1 \
+	   flower indev $swp1 l2_miss 0 src_ip $sip dst_ip $dip \
+	   action pass
+
+	# Before adding MDB entry.
+	$MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+	tc_check_packets "dev $swp2 egress" 101 1
+	check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
+
+	tc_check_packets "dev $swp2 egress" 102 0
+	check_err $? "Registered multicast filter was hit before adding MDB entry"
+
+	# Adding MDB entry.
+	bridge mdb replace dev br1 port $swp2 grp $dip permanent
+
+	$MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+	tc_check_packets "dev $swp2 egress" 101 1
+	check_err $? "Unregistered multicast filter was hit after adding MDB entry"
+
+	tc_check_packets "dev $swp2 egress" 102 1
+	check_err $? "Registered multicast filter was not hit after adding MDB entry"
+
+	# Deleting MDB entry.
+	bridge mdb del dev br1 port $swp2 grp $dip
+
+	$MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+	tc_check_packets "dev $swp2 egress" 101 2
+	check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
+
+	tc_check_packets "dev $swp2 egress" 102 1
+	check_err $? "Registered multicast filter was hit after deleting MDB entry"
+
+	tc filter del dev $swp2 egress protocol $proto pref 1 handle 102 flower
+	tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+	log_test "L2 miss - Multicast ($name)"
+}
+
+test_l2_miss_multicast_ipv4()
+{
+	local proto="ipv4"
+	local sip=192.0.2.1
+	local dip=239.1.1.1
+	local dmac=01:00:5e:01:01:01
+	local mode="-4"
+	local name="IPv4"
+
+	test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
+}
+
+test_l2_miss_multicast_ipv6()
+{
+	local proto="ipv6"
+	local sip=2001:db8:1::1
+	local dip=ff0e::1
+	local dmac=33:33:00:00:00:01
+	local mode="-6"
+	local name="IPv6"
+
+	test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
+}
+
+test_l2_miss_multicast()
+{
+	# Configure $swp2 as a multicast router port so that it will forward
+	# both registered and unregistered multicast traffic.
+	bridge link set dev $swp2 mcast_router 2
+
+	# Set the Max Response Delay to 100 centiseconds (1 second) so that the
+	# bridge will start forwarding according to its MDB soon after a
+	# multicast querier is enabled.
+	ip link set dev br1 type bridge mcast_query_response_interval 100
+
+	# Forwarding according to MDB entries only takes place when the bridge
+	# detects that there is a valid querier in the network. Set the bridge
+	# as the querier and assign it a valid IPv6 link-local address to be
+	# used as the source address for MLD queries.
+	ip link set dev br1 type bridge mcast_querier 1
+	ip -6 address add fe80::1/64 nodad dev br1
+	sleep 10
+
+	test_l2_miss_multicast_ipv4
+	test_l2_miss_multicast_ipv6
+
+	ip -6 address del fe80::1/64 dev br1
+	ip link set dev br1 type bridge mcast_querier 0
+	ip link set dev br1 type bridge mcast_query_response_interval 1000
+	bridge link set dev $swp2 mcast_router 1
+}
+
+test_l2_miss_multicast_common2()
+{
+	local name=$1; shift
+	local dmac=$1; shift
+	local dip=224.0.0.1
+	local sip=192.0.2.1
+
+}
+
+test_l2_miss_ll_multicast_common()
+{
+	local proto=$1; shift
+	local dmac=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local mode=$1; shift
+	local name=$1; shift
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+	   flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \
+	   dst_ip $dip action pass
+
+	$MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+	tc_check_packets "dev $swp2 egress" 101 1
+	check_err $? "Filter was not hit"
+
+	tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+	log_test "L2 miss - Link-local multicast ($name)"
+}
+
+test_l2_miss_ll_multicast_ipv4()
+{
+	local proto=ipv4
+	local dmac=01:00:5e:00:00:01
+	local sip=192.0.2.1
+	local dip=224.0.0.1
+	local mode="-4"
+	local name="IPv4"
+
+	test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast_ipv6()
+{
+	local proto=ipv6
+	local dmac=33:33:00:00:00:01
+	local sip=2001:db8:1::1
+	local dip=ff02::1
+	local mode="-6"
+	local name="IPv6"
+
+	test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast()
+{
+	test_l2_miss_ll_multicast_ipv4
+	test_l2_miss_ll_multicast_ipv6
+}
+
+test_l2_miss_broadcast()
+{
+	local dmac=ff:ff:ff:ff:ff:ff
+	local smac=00:01:02:03:04:05
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol all handle 101 pref 1 \
+	   flower l2_miss 1 dst_mac $dmac src_mac $smac \
+	   action pass
+	tc filter add dev $swp2 egress protocol all handle 102 pref 1 \
+	   flower l2_miss 0 dst_mac $dmac src_mac $smac \
+	   action pass
+
+	$MZ $h1 -a $smac -b $dmac -c 1 -p 100 -q
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_err $? "L2 miss filter was hit when should not"
+
+	tc_check_packets "dev $swp2 egress" 102 1
+	check_err $? "L2 no miss filter was not hit when should"
+
+	tc filter del dev $swp2 egress protocol all pref 1 handle 102 flower
+	tc filter del dev $swp2 egress protocol all pref 1 handle 101 flower
+
+	log_test "L2 miss - Broadcast"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
new file mode 100755
index 000000000000..3885a2a91f7d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -0,0 +1,228 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                             +----------------------+
+# | H1 (vrf)              |                             | H2 (vrf)             |
+# |    + $h1              |                             |              $h2 +   |
+# |    | 192.0.2.1/28     |                             |     192.0.2.2/28 |   |
+# |    | 2001:db8:1::1/64 |                             | 2001:db8:1::2/64 |   |
+# +----|------------------+                             +------------------|---+
+#      |                                                                   |
+# +----|-------------------------------------------------------------------|---+
+# | SW |                                                                   |   |
+# |  +-|-------------------------------------------------------------------|-+ |
+# |  | + $swp1                       BR                              $swp2 + | |
+# |  +-----------------------------------------------------------------------+ |
+# +----------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_port_range_ipv4_udp
+	test_port_range_ipv4_tcp
+	test_port_range_ipv6_udp
+	test_port_range_ipv6_tcp
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	ip link set dev br1 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev br1 down
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+__test_port_range()
+{
+	local proto=$1; shift
+	local ip_proto=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local mode=$1; shift
+	local name=$1; shift
+	local dmac=$(mac_get $h2)
+	local smac=$(mac_get $h1)
+	local sport_min=100
+	local sport_max=200
+	local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+	local dport_min=300
+	local dport_max=400
+	local dport_mid=$((dport_min + (dport_max - dport_min) / 2))
+
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+		flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+		src_port $sport_min-$sport_max \
+		dst_port $dport_min-$dport_max \
+		action pass
+	tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+		flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+		src_port $sport_min-$sport_max \
+		dst_port $dport_min-$dport_max \
+		action drop
+
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$sport_min,dp=$dport_min"
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Ingress filter not hit with minimum ports"
+	tc_check_packets "dev $swp2 egress" 101 1
+	check_err $? "Egress filter not hit with minimum ports"
+
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$sport_mid,dp=$dport_mid"
+	tc_check_packets "dev $swp1 ingress" 101 2
+	check_err $? "Ingress filter not hit with middle ports"
+	tc_check_packets "dev $swp2 egress" 101 2
+	check_err $? "Egress filter not hit with middle ports"
+
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$sport_max,dp=$dport_max"
+	tc_check_packets "dev $swp1 ingress" 101 3
+	check_err $? "Ingress filter not hit with maximum ports"
+	tc_check_packets "dev $swp2 egress" 101 3
+	check_err $? "Egress filter not hit with maximum ports"
+
+	# Send traffic when both ports are out of range and when only one port
+	# is out of range.
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$((sport_min - 1)),dp=$dport_min"
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$((sport_max + 1)),dp=$dport_min"
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$sport_min,dp=$((dport_min - 1))"
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$sport_min,dp=$((dport_max + 1))"
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$((sport_max + 1)),dp=$((dport_max + 1))"
+	tc_check_packets "dev $swp1 ingress" 101 3
+	check_err $? "Ingress filter was hit when should not"
+	tc_check_packets "dev $swp2 egress" 101 3
+	check_err $? "Egress filter was hit when should not"
+
+	tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+	tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+	log_test "Port range matching - $name"
+}
+
+test_port_range_ipv4_udp()
+{
+	local proto=ipv4
+	local ip_proto=udp
+	local sip=192.0.2.1
+	local dip=192.0.2.2
+	local mode="-4"
+	local name="IPv4 UDP"
+
+	__test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv4_tcp()
+{
+	local proto=ipv4
+	local ip_proto=tcp
+	local sip=192.0.2.1
+	local dip=192.0.2.2
+	local mode="-4"
+	local name="IPv4 TCP"
+
+	__test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv6_udp()
+{
+	local proto=ipv6
+	local ip_proto=udp
+	local sip=2001:db8:1::1
+	local dip=2001:db8:1::2
+	local mode="-6"
+	local name="IPv6 UDP"
+
+	__test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv6_tcp()
+{
+	local proto=ipv6
+	local ip_proto=tcp
+	local sip=2001:db8:1::1
+	local dip=2001:db8:1::2
+	local mode="-6"
+	local name="IPv6 TCP"
+
+	__test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh
new file mode 100755
index 000000000000..03743f04e178
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+
+# | H1 (v$h1)             |
+# | 192.0.2.1/24          |
+# | 2001:db8::1/124       |
+# |                 + $h1 |
+# +-----------------|-----+
+#                   |
+#                   | (Plain Ethernet traffic)
+#                   |
+# +-----------------|-----------------------------------------+
+# | LER1            + $edge1                                  |
+# |                     -ingress:                             |
+# |                       -encapsulate Ethernet into MPLS     |
+# |                       -add outer Ethernet header          |
+# |                       -redirect to $mpls1 (egress)        |
+# |                                                           |
+# |                 + $mpls1                                  |
+# |                 |   -ingress:                             |
+# |                 |     -remove outer Ethernet header       |
+# |                 |     -remove MPLS header                 |
+# |                 |     -redirect to $edge1 (egress)        |
+# +-----------------|-----------------------------------------+
+#                   |
+#                   | (Ethernet over MPLS traffic)
+#                   |
+# +-----------------|-----------------------------------------+
+# | LER2            + $mpls2                                  |
+# |                     -ingress:                             |
+# |                       -remove outer Ethernet header       |
+# |                       -remove MPLS header                 |
+# |                       -redirect to $edge2 (egress)        |
+# |                                                           |
+# |                 + $edge2                                  |
+# |                 |   -ingress:                             |
+# |                 |     -encapsulate Ethernet into MPLS     |
+# |                 |     -add outer Ethernet header          |
+# |                 |     -redirect to $mpls2 (egress)        |
+# +-----------------|-----------------------------------------|
+#                   |
+#                   | (Plain Ethernet traffic)
+#                   |
+# +-----------------|-----+
+# | H2 (v$h2)       |     |
+# |                 + $h2 |
+# | 192.0.2.2/24          |
+# | 2001:db8::2/124       |
+# +-----------------------+
+#
+# LER1 and LER2 logically represent two different routers. However, no VRF is
+# created for them, as they don't do any IP routing.
+
+ALL_TESTS="mpls_forward_eth"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8::1/124
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8::1/124
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8::2/124
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8::2/124
+}
+
+ler1_create()
+{
+	tc qdisc add dev $edge1 ingress
+	tc filter add dev $edge1 ingress                            \
+	   matchall                                                 \
+	   action mpls mac_push label 102                           \
+	   action vlan push_eth dst_mac $mpls2mac src_mac $mpls1mac \
+	   action mirred egress redirect dev $mpls1
+	ip link set dev $edge1 up
+
+	tc qdisc add dev $mpls1 ingress
+	tc filter add dev $mpls1 ingress            \
+	   protocol mpls_uc                         \
+	   flower mpls_label 101                    \
+	   action vlan pop_eth                      \
+	   action mpls pop protocol teb             \
+	   action mirred egress redirect dev $edge1
+	ip link set dev $mpls1 up
+}
+
+ler1_destroy()
+{
+	ip link set dev $mpls1 down
+	tc qdisc del dev $mpls1 ingress
+
+	ip link set dev $edge1 down
+	tc qdisc del dev $edge1 ingress
+}
+
+ler2_create()
+{
+	tc qdisc add dev $edge2 ingress
+	tc filter add dev $edge2 ingress                            \
+	   matchall                                                 \
+	   action mpls mac_push label 101                           \
+	   action vlan push_eth dst_mac $mpls1mac src_mac $mpls2mac \
+	   action mirred egress redirect dev $mpls2
+	ip link set dev $edge2 up
+
+	tc qdisc add dev $mpls2 ingress
+	tc filter add dev $mpls2 ingress            \
+	   protocol mpls_uc                         \
+	   flower mpls_label 102                    \
+	   action vlan pop_eth                      \
+	   action mpls pop protocol teb             \
+	   action mirred egress redirect dev $edge2
+	ip link set dev $mpls2 up
+}
+
+ler2_destroy()
+{
+	ip link set dev $mpls2 down
+	tc qdisc del dev $mpls2 ingress
+
+	ip link set dev $edge2 down
+	tc qdisc del dev $edge2 ingress
+}
+
+mpls_forward_eth()
+{
+	ping_test $h1 192.0.2.2
+	ping6_test $h1 2001:db8::2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	edge1=${NETIFS[p2]}
+
+	mpls1=${NETIFS[p3]}
+	mpls2=${NETIFS[p4]}
+
+	edge2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	mpls1mac=$(mac_get $mpls1)
+	mpls2mac=$(mac_get $mpls2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	ler1_create
+	ler2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ler2_destroy
+	ler1_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
index 160f9cccdfb7..5103f64a71d6 100755
--- a/tools/testing/selftests/net/forwarding/tc_police.sh
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -35,6 +35,10 @@ ALL_TESTS="
 	police_shared_test
 	police_rx_mirror_test
 	police_tx_mirror_test
+	police_pps_rx_test
+	police_pps_tx_test
+	police_mtu_rx_test
+	police_mtu_tx_test
 "
 NUM_NETIFS=6
 source tc_common.sh
@@ -136,7 +140,7 @@ police_common_test()
 	sleep 10
 	local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
 
-	local er=$((80 * 1000 * 1000))
+	local er=$((10 * 1000 * 1000))
 	local nr=$(rate $t0 $t1 10)
 	local nr_pct=$((100 * (nr - er) / er))
 	((-10 <= nr_pct && nr_pct <= 10))
@@ -153,7 +157,7 @@ police_rx_test()
 	# Rule to police traffic destined to $h2 on ingress of $rp1
 	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
 		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
-		action police rate 80mbit burst 16k conform-exceed drop/ok
+		action police rate 10mbit burst 16k conform-exceed drop/ok
 
 	police_common_test "police on rx"
 
@@ -165,7 +169,7 @@ police_tx_test()
 	# Rule to police traffic destined to $h2 on egress of $rp2
 	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
 		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
-		action police rate 80mbit burst 16k conform-exceed drop/ok
+		action police rate 10mbit burst 16k conform-exceed drop/ok
 
 	police_common_test "police on tx"
 
@@ -186,7 +190,7 @@ police_shared_common_test()
 	sleep 10
 	local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
 
-	local er=$((80 * 1000 * 1000))
+	local er=$((10 * 1000 * 1000))
 	local nr=$(rate $t0 $t1 10)
 	local nr_pct=$((100 * (nr - er) / er))
 	((-10 <= nr_pct && nr_pct <= 10))
@@ -207,7 +211,7 @@ police_shared_test()
 	# Rule to police traffic destined to $h2 on ingress of $rp1
 	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
 		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
-		action police rate 80mbit burst 16k conform-exceed drop/ok \
+		action police rate 10mbit burst 16k conform-exceed drop/ok \
 		index 10
 
 	# Rule to police a different flow destined to $h2 on egress of $rp2
@@ -246,7 +250,7 @@ police_mirror_common_test()
 	# Rule to police traffic destined to $h2 and mirror to $h3
 	tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \
 		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
-		action police rate 80mbit burst 16k conform-exceed drop/pipe \
+		action police rate 10mbit burst 16k conform-exceed drop/pipe \
 		action mirred egress mirror dev $rp3
 
 	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
@@ -256,7 +260,7 @@ police_mirror_common_test()
 	sleep 10
 	local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
 
-	local er=$((80 * 1000 * 1000))
+	local er=$((10 * 1000 * 1000))
 	local nr=$(rate $t0 $t1 10)
 	local nr_pct=$((100 * (nr - er) / er))
 	((-10 <= nr_pct && nr_pct <= 10))
@@ -266,7 +270,7 @@ police_mirror_common_test()
 	sleep 10
 	local t1=$(tc_rule_stats_get $h3 1 ingress .bytes)
 
-	local er=$((80 * 1000 * 1000))
+	local er=$((10 * 1000 * 1000))
 	local nr=$(rate $t0 $t1 10)
 	local nr_pct=$((100 * (nr - er) / er))
 	((-10 <= nr_pct && nr_pct <= 10))
@@ -290,6 +294,110 @@ police_tx_mirror_test()
 	police_mirror_common_test $rp2 egress "police tx and mirror"
 }
 
+police_pps_common_test()
+{
+	local test_name=$1; shift
+
+	RET=0
+
+	# Rule to measure bandwidth on ingress of $h2
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action drop
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+	local t0=$(tc_rule_stats_get $h2 1 ingress .packets)
+	sleep 10
+	local t1=$(tc_rule_stats_get $h2 1 ingress .packets)
+
+	local er=$((2000))
+	local nr=$(packets_rate $t0 $t1 10)
+	local nr_pct=$((100 * (nr - er) / er))
+	((-10 <= nr_pct && nr_pct <= 10))
+	check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+	log_test "$test_name"
+
+	{ kill %% && wait %%; } 2>/dev/null
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_rx_test()
+{
+	# Rule to police traffic destined to $h2 on ingress of $rp1
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+	police_pps_common_test "police pps on rx"
+
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_tx_test()
+{
+	# Rule to police traffic destined to $h2 on egress of $rp2
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+	police_pps_common_test "police pps on tx"
+
+	tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
+police_mtu_common_test() {
+	RET=0
+
+	local test_name=$1; shift
+	local dev=$1; shift
+	local direction=$1; shift
+
+	tc filter add dev $dev $direction protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police mtu 1042 conform-exceed drop/ok
+
+	# to count "conform" packets
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action drop
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=54321 -p 1001 -c 10 -q
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=54321 -p 1000 -c 3 -q
+
+	tc_check_packets "dev $dev $direction" 101 13
+	check_err $? "wrong packet counter"
+
+	# "exceed" packets
+	local overlimits_t0=$(tc_rule_stats_get ${dev} 1 ${direction} .overlimits)
+	test ${overlimits_t0} = 10
+	check_err $? "wrong overlimits, expected 10 got ${overlimits_t0}"
+
+	# "conform" packets
+	tc_check_packets "dev $h2 ingress" 101 3
+	check_err $? "forwarding error"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $dev $direction protocol ip pref 1 handle 101 flower
+
+	log_test "$test_name"
+}
+
+police_mtu_rx_test()
+{
+	police_mtu_common_test "police mtu (rx)" $rp1 ingress
+}
+
+police_mtu_tx_test()
+{
+	police_mtu_common_test "police mtu (tx)" $rp2 egress
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
new file mode 100755
index 000000000000..5a5dd9034819
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+ALL_TESTS="tunnel_key_nofrag_test"
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+	forwarding_enable
+	mtu_set $h1 1500
+	tunnel_create h1-et vxlan 192.0.2.1 192.0.2.2 dev $h1 dstport 0 external
+	tc qdisc add dev h1-et clsact
+	mtu_set h1-et 1230
+	mtu_restore $h1
+	mtu_set $h1 1000
+}
+
+h1_destroy()
+{
+	tc qdisc del dev h1-et clsact
+	tunnel_destroy h1-et
+	forwarding_restore
+	mtu_restore $h1
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 clsact
+	simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.1/24
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swp1origmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp1 address $h2mac
+	ip link set $swp2 address $h1mac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+
+	if ! tc action add action tunnel_key help 2>&1 | grep -q nofrag; then
+		log_test "SKIP: iproute doesn't support nofrag"
+		exit $ksft_skip
+	fi
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+	ip link set $swp1 address $swp1origmac
+}
+
+tunnel_key_nofrag_test()
+{
+	RET=0
+	local i
+
+	tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \
+		flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+		ip_flags nofrag action drop
+	tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \
+		flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+		ip_flags firstfrag action drop
+	tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \
+		flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+		ip_flags nofirstfrag action drop
+
+	# test 'nofrag' set
+	tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \
+		action tunnel_key set src_ip 192.0.2.1 dst_ip 192.0.2.2 id 42 nofrag index 10
+	$MZ h1-et -c 1 -p 930 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q
+	tc_check_packets "dev $swp1 ingress" 100 1
+	check_err $? "packet smaller than MTU was not tunneled"
+
+	$MZ h1-et -c 1 -p 931 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q
+	tc_check_packets "dev $swp1 ingress" 100 1
+	check_err $? "packet bigger than MTU matched nofrag (nofrag was set)"
+	tc_check_packets "dev $swp1 ingress" 101 0
+	check_err $? "packet bigger than MTU matched firstfrag (nofrag was set)"
+	tc_check_packets "dev $swp1 ingress" 102 0
+	check_err $? "packet bigger than MTU matched nofirstfrag (nofrag was set)"
+
+	# test 'nofrag' cleared
+	tc actions change action tunnel_key set src_ip 192.0.2.1 dst_ip 192.0.2.2 id 42 index 10
+	$MZ h1-et -c 1 -p 931 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q
+	tc_check_packets "dev $swp1  ingress" 100 1
+	check_err $? "packet bigger than MTU matched nofrag (nofrag was unset)"
+	tc_check_packets "dev $swp1  ingress" 101 1
+	check_err $? "packet bigger than MTU didn't match firstfrag (nofrag was unset) "
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "packet bigger than MTU didn't match nofirstfrag (nofrag was unset) "
+
+	for i in 100 101 102; do
+		tc filter del dev $swp1 ingress protocol ip pref $i handle $i flower
+	done
+	tc filter del dev h1-et egress pref 1 handle 1 matchall
+
+	log_test "tunnel_key nofrag ($tcflags)"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
new file mode 100644
index 000000000000..b91bcd8008a9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -0,0 +1,249 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
+REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
+
+# Tunables
+UTC_TAI_OFFSET=37
+ISOCHRON_CPU=1
+
+if [[ "$REQUIRE_ISOCHRON" = "yes" ]]; then
+	# https://github.com/vladimiroltean/tsn-scripts
+	# WARNING: isochron versions pre-1.0 are unstable,
+	# always use the latest version
+	require_command isochron
+fi
+if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
+	require_command phc2sys
+	require_command ptp4l
+fi
+
+phc2sys_start()
+{
+	local uds_address=$1
+	local extra_args=""
+
+	if ! [ -z "${uds_address}" ]; then
+		extra_args="${extra_args} -z ${uds_address}"
+	fi
+
+	phc2sys_log="$(mktemp)"
+
+	chrt -f 10 phc2sys -m \
+		-a -rr \
+		--step_threshold 0.00002 \
+		--first_step_threshold 0.00002 \
+		${extra_args} \
+		> "${phc2sys_log}" 2>&1 &
+	phc2sys_pid=$!
+
+	echo "phc2sys logs to ${phc2sys_log} and has pid ${phc2sys_pid}"
+
+	sleep 1
+}
+
+phc2sys_stop()
+{
+	{ kill ${phc2sys_pid} && wait ${phc2sys_pid}; } 2> /dev/null
+	rm "${phc2sys_log}" 2> /dev/null
+}
+
+# Replace space separators from interface list with underscores
+if_names_to_label()
+{
+	local if_name_list="$1"
+
+	echo "${if_name_list/ /_}"
+}
+
+ptp4l_start()
+{
+	local if_names="$1"
+	local slave_only=$2
+	local uds_address=$3
+	local log="ptp4l_log_$(if_names_to_label ${if_names})"
+	local pid="ptp4l_pid_$(if_names_to_label ${if_names})"
+	local extra_args=""
+
+	for if_name in ${if_names}; do
+		extra_args="${extra_args} -i ${if_name}"
+	done
+
+	if [ "${slave_only}" = true ]; then
+		extra_args="${extra_args} -s"
+	fi
+
+	# declare dynamic variables ptp4l_log_${if_name} and ptp4l_pid_${if_name}
+	# as global, so that they can be referenced later
+	declare -g "${log}=$(mktemp)"
+
+	chrt -f 10 ptp4l -m -2 -P \
+		--step_threshold 0.00002 \
+		--first_step_threshold 0.00002 \
+		--tx_timestamp_timeout 100 \
+		--uds_address="${uds_address}" \
+		${extra_args} \
+		> "${!log}" 2>&1 &
+	declare -g "${pid}=$!"
+
+	echo "ptp4l for interfaces ${if_names} logs to ${!log} and has pid ${!pid}"
+
+	sleep 1
+}
+
+ptp4l_stop()
+{
+	local if_names="$1"
+	local log="ptp4l_log_$(if_names_to_label ${if_names})"
+	local pid="ptp4l_pid_$(if_names_to_label ${if_names})"
+
+	{ kill ${!pid} && wait ${!pid}; } 2> /dev/null
+	rm "${!log}" 2> /dev/null
+}
+
+cpufreq_max()
+{
+	local cpu=$1
+	local freq="cpu${cpu}_freq"
+	local governor="cpu${cpu}_governor"
+
+	# Kernel may be compiled with CONFIG_CPU_FREQ disabled
+	if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+		return
+	fi
+
+	# declare dynamic variables cpu${cpu}_freq and cpu${cpu}_governor as
+	# global, so they can be referenced later
+	declare -g "${freq}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq)"
+	declare -g "${governor}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor)"
+
+	cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_max_freq > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+	echo -n "performance" > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+cpufreq_restore()
+{
+	local cpu=$1
+	local freq="cpu${cpu}_freq"
+	local governor="cpu${cpu}_governor"
+
+	if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+		return
+	fi
+
+	echo "${!freq}" > /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+	echo -n "${!governor}" > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+isochron_recv_start()
+{
+	local if_name=$1
+	local uds=$2
+	local stats_port=$3
+	local extra_args=$4
+	local pid="isochron_pid_${stats_port}"
+
+	if ! [ -z "${uds}" ]; then
+		extra_args="${extra_args} --unix-domain-socket ${uds}"
+	fi
+
+	isochron rcv \
+		--interface ${if_name} \
+		--sched-priority 98 \
+		--sched-fifo \
+		--utc-tai-offset ${UTC_TAI_OFFSET} \
+		--stats-port ${stats_port} \
+		--quiet \
+		${extra_args} & \
+	declare -g "${pid}=$!"
+
+	sleep 1
+}
+
+isochron_recv_stop()
+{
+	local stats_port=$1
+	local pid="isochron_pid_${stats_port}"
+
+	{ kill ${!pid} && wait ${!pid}; } 2> /dev/null
+}
+
+isochron_do()
+{
+	local sender_if_name=$1; shift
+	local receiver_if_name=$1; shift
+	local sender_uds=$1; shift
+	local receiver_uds=$1; shift
+	local base_time=$1; shift
+	local cycle_time=$1; shift
+	local shift_time=$1; shift
+	local num_pkts=$1; shift
+	local vid=$1; shift
+	local priority=$1; shift
+	local dst_ip=$1; shift
+	local isochron_dat=$1; shift
+	local extra_args=""
+	local receiver_extra_args=""
+	local vrf="$(master_name_get ${sender_if_name})"
+	local use_l2="true"
+
+	if ! [ -z "${dst_ip}" ]; then
+		use_l2="false"
+	fi
+
+	if ! [ -z "${vrf}" ]; then
+		dst_ip="${dst_ip}%${vrf}"
+	fi
+
+	if ! [ -z "${vid}" ]; then
+		vid="--vid=${vid}"
+	fi
+
+	if [ -z "${receiver_uds}" ]; then
+		extra_args="${extra_args} --omit-remote-sync"
+	fi
+
+	if ! [ -z "${shift_time}" ]; then
+		extra_args="${extra_args} --shift-time=${shift_time}"
+	fi
+
+	if [ "${use_l2}" = "true" ]; then
+		extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
+		receiver_extra_args="--l2 --etype=0xdead"
+	else
+		extra_args="${extra_args} --l4 --ip-destination=${dst_ip}"
+		receiver_extra_args="--l4"
+	fi
+
+	cpufreq_max ${ISOCHRON_CPU}
+
+	isochron_recv_start "${h2}" "${receiver_uds}" 5000 "${receiver_extra_args}"
+
+	isochron send \
+		--interface ${sender_if_name} \
+		--unix-domain-socket ${sender_uds} \
+		--priority ${priority} \
+		--base-time ${base_time} \
+		--cycle-time ${cycle_time} \
+		--num-frames ${num_pkts} \
+		--frame-size 64 \
+		--txtime \
+		--utc-tai-offset ${UTC_TAI_OFFSET} \
+		--cpu-mask $((1 << ${ISOCHRON_CPU})) \
+		--sched-fifo \
+		--sched-priority 98 \
+		--client 127.0.0.1 \
+		--sync-threshold 5000 \
+		--output-file ${isochron_dat} \
+		${extra_args} \
+		--quiet
+
+	isochron_recv_stop 5000
+
+	cpufreq_restore ${ISOCHRON_CPU}
+}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
index 0727e2012b68..43469c7de118 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
@@ -525,7 +525,7 @@ arp_suppression()
 
 	log_test "neigh_suppress: on / neigh exists: yes"
 
-	# Delete the neighbour from the the SVI. A single ARP request should be
+	# Delete the neighbour from the SVI. A single ARP request should be
 	# received by the remote VTEP
 	RET=0
 
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh
new file mode 100755
index 000000000000..f4930098974f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh
@@ -0,0 +1,504 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------------------+            +-----------------------------+
+# |                         vrf-h1 |            |                      vrf-h2 |
+# |    + $h1                       |            | + $h2                       |
+# |    | 2001:db8:1::1/64          |            | | 2001:db8:2::1/64          |
+# |    | default via 2001:db8:1::3 |            | | default via 2001:db8:2::3 |
+# +----|---------------------------+            +-|---------------------------+
+#      |                                          |
+# +----|------------------------------------------|---------------------------+
+# | SW |                                          |                           |
+# | +--|------------------------------------------|-------------------------+ |
+# | |  + $swp1                         br1        + $swp2                   | |
+# | |     vid 10 pvid untagged                       vid 20 pvid untagged   | |
+# | |                                                                       | |
+# | |  + vx10                                     + vx20                    | |
+# | |    local 2001:db8:3::1                        local 2001:db8:3::1     | |
+# | |    remote 2001:db8:3::2                       remote 2001:db8:3::2    | |
+# | |    id 1000                                    id 2000                 | |
+# | |    dstport 4789                               dstport 4789            | |
+# | |    vid 10 pvid untagged                       vid 20 pvid untagged    | |
+# | |                                                                       | |
+# | +-----------------------------------+-----------------------------------+ |
+# |                                     |                                     |
+# | +-----------------------------------|-----------------------------------+ |
+# | |                                   |                                   | |
+# | |  +--------------------------------+--------------------------------+  | |
+# | |  |                                                                 |  | |
+# | |  + vlan10                                                   vlan20 +  | |
+# | |  | 2001:db8:1::2/64                               2001:db8:2::2/64 |  | |
+# | |  |                                                                 |  | |
+# | |  + vlan10-v (macvlan)                           vlan20-v (macvlan) +  | |
+# | |    2001:db8:1::3/64                               2001:db8:2::3/64    | |
+# | |    00:00:5e:00:01:01                             00:00:5e:00:01:01    | |
+# | |                               vrf-green                               | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |    + $rp1                                       +lo                       |
+# |    | 2001:db8:4::1/64                            2001:db8:3::1/128        |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                            vrf-spine                   |
+# |    + $rp2                                                   |
+# |      2001:db8:4::2/64                                       |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |                                                             |
+# |    + v1 (veth)                                              |
+# |    | 2001:db8:5::2/64                                       |
+# +----|--------------------------------------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# |    + v2 (veth)                                  +lo           NS1 (netns) |
+# |      2001:db8:5::1/64                            2001:db8:3::2/128        |
+# |                                                                           |
+# | +-----------------------------------------------------------------------+ |
+# | |                               vrf-green                               | |
+# | |  + vlan10-v (macvlan)                           vlan20-v (macvlan) +  | |
+# | |  | 2001:db8:1::3/64                               2001:db8:2::3/64 |  | |
+# | |  | 00:00:5e:00:01:01                             00:00:5e:00:01:01 |  | |
+# | |  |                                                                 |  | |
+# | |  + vlan10                                                   vlan20 +  | |
+# | |  | 2001:db8:1::3/64                               2001:db8:2::3/64 |  | |
+# | |  |                                                                 |  | |
+# | |  +--------------------------------+--------------------------------+  | |
+# | |                                   |                                   | |
+# | +-----------------------------------|-----------------------------------+ |
+# |                                     |                                     |
+# | +-----------------------------------+-----------------------------------+ |
+# | |                                                                       | |
+# | |  + vx10                                     + vx20                    | |
+# | |    local 2001:db8:3::2                        local 2001:db8:3::2     | |
+# | |    remote 2001:db8:3::1                       remote 2001:db8:3::1    | |
+# | |    id 1000                                    id 2000                 | |
+# | |    dstport 4789                               dstport 4789            | |
+# | |    vid 10 pvid untagged                       vid 20 pvid untagged    | |
+# | |                                                                       | |
+# | |  + w1 (veth)                                + w3 (veth)               | |
+# | |  | vid 10 pvid untagged          br1        | vid 20 pvid untagged    | |
+# | +--|------------------------------------------|-------------------------+ |
+# |    |                                          |                           |
+# |    |                                          |                           |
+# | +--|----------------------+                +--|-------------------------+ |
+# | |  |               vrf-h1 |                |  |                  vrf-h2 | |
+# | |  + w2 (veth)            |                |  + w4 (veth)               | |
+# | |    2001:db8:1::4/64     |                |    2001:db8:2::4/64        | |
+# | |    default via          |                |    default via             | |
+# | |    2001:db8:1::3/64     |                |    2001:db8:2::3/64        | |
+# | +-------------------------+                +----------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv6
+	arp_decap
+"
+NUM_NETIFS=6
+source lib.sh
+
+require_command $ARPING
+
+hx_create()
+{
+	local vrf_name=$1; shift
+	local if_name=$1; shift
+	local ip_addr=$1; shift
+	local gw_ip=$1; shift
+
+	vrf_create $vrf_name
+	ip link set dev $if_name master $vrf_name
+	ip link set dev $vrf_name up
+	ip link set dev $if_name up
+
+	ip address add $ip_addr/64 dev $if_name
+	ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \
+		dev $if_name
+	ip route add default vrf $vrf_name nexthop via $gw_ip
+}
+export -f hx_create
+
+hx_destroy()
+{
+	local vrf_name=$1; shift
+	local if_name=$1; shift
+	local ip_addr=$1; shift
+	local gw_ip=$1; shift
+
+	ip route del default vrf $vrf_name nexthop via $gw_ip
+	ip neigh del $gw_ip dev $if_name
+	ip address del $ip_addr/64 dev $if_name
+
+	ip link set dev $if_name down
+	vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+	hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h1_destroy()
+{
+	hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h2_create()
+{
+	hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+h2_destroy()
+{
+	hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	ip address add dev $rp1 2001:db8:4::1/64
+	ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2
+
+	ip link add name vx10 type vxlan id 1000		\
+		local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx10 up
+
+	ip link set dev vx10 master br1
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link add name vx20 type vxlan id 2000		\
+		local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx20 up
+
+	ip link set dev vx20 master br1
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 10 dev $swp1 pvid untagged
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	bridge vlan add vid 20 dev $swp2 pvid untagged
+
+	ip address add 2001:db8:3::1/128 dev lo
+
+	# Create SVIs
+	vrf_create "vrf-green"
+	ip link set dev vrf-green up
+
+	ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+	ip address add 2001:db8:1::2/64 dev vlan10
+	ip link add link vlan10 name vlan10-v up master vrf-green \
+		address 00:00:5e:00:01:01 type macvlan mode private
+	ip address add 2001:db8:1::3/64 dev vlan10-v
+
+	ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+	ip address add 2001:db8:2::2/64 dev vlan20
+	ip link add link vlan20 name vlan20-v up master vrf-green \
+		address 00:00:5e:00:01:01 type macvlan mode private
+	ip address add 2001:db8:2::3/64 dev vlan20-v
+
+	bridge vlan add vid 10 dev br1 self
+	bridge vlan add vid 20 dev br1 self
+
+	bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+	bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+}
+
+switch_destroy()
+{
+	bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
+	bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
+
+	bridge vlan del vid 20 dev br1 self
+	bridge vlan del vid 10 dev br1 self
+
+	ip link del dev vlan20
+
+	ip link del dev vlan10
+
+	vrf_destroy "vrf-green"
+
+	ip address del 2001:db8:3::1/128 dev lo
+
+	bridge vlan del vid 20 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	bridge vlan del vid 20 dev vx20
+	ip link set dev vx20 nomaster
+
+	ip link set dev vx20 down
+	ip link del dev vx20
+
+	bridge vlan del vid 10 dev vx10
+	ip link set dev vx10 nomaster
+
+	ip link set dev vx10 down
+	ip link del dev vx10
+
+	ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2
+	ip address del dev $rp1 2001:db8:4::1/64
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+spine_create()
+{
+	vrf_create "vrf-spine"
+	ip link set dev $rp2 master vrf-spine
+	ip link set dev v1 master vrf-spine
+	ip link set dev vrf-spine up
+	ip link set dev $rp2 up
+	ip link set dev v1 up
+
+	ip address add 2001:db8:4::2/64 dev $rp2
+	ip address add 2001:db8:5::2/64 dev v1
+
+	ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+		2001:db8:4::1
+	ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+		2001:db8:5::1
+}
+
+spine_destroy()
+{
+	ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+		2001:db8:5::1
+	ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+		2001:db8:4::1
+
+	ip address del 2001:db8:5::2/64 dev v1
+	ip address del 2001:db8:4::2/64 dev $rp2
+
+	ip link set dev v1 down
+	ip link set dev $rp2 down
+	vrf_destroy "vrf-spine"
+}
+
+ns_h1_create()
+{
+	hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3
+}
+export -f ns_h1_create
+
+ns_h2_create()
+{
+	hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3
+}
+export -f ns_h2_create
+
+ns_switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	ip link set dev br1 up
+
+	ip link set dev v2 up
+	ip address add dev v2 2001:db8:5::1/64
+	ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2
+
+	ip link add name vx10 type vxlan id 1000		\
+		local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx10 up
+
+	ip link set dev vx10 master br1
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link add name vx20 type vxlan id 2000		\
+		local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx20 up
+
+	ip link set dev vx20 master br1
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	ip link set dev w1 master br1
+	ip link set dev w1 up
+	bridge vlan add vid 10 dev w1 pvid untagged
+
+	ip link set dev w3 master br1
+	ip link set dev w3 up
+	bridge vlan add vid 20 dev w3 pvid untagged
+
+	ip address add 2001:db8:3::2/128 dev lo
+
+	# Create SVIs
+	vrf_create "vrf-green"
+	ip link set dev vrf-green up
+
+	ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+	ip address add 2001:db8:1::3/64 dev vlan10
+	ip link add link vlan10 name vlan10-v up master vrf-green \
+		address 00:00:5e:00:01:01 type macvlan mode private
+	ip address add 2001:db8:1::3/64 dev vlan10-v
+
+	ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+	ip address add 2001:db8:2::3/64 dev vlan20
+	ip link add link vlan20 name vlan20-v up master vrf-green \
+		address 00:00:5e:00:01:01 type macvlan mode private
+	ip address add 2001:db8:2::3/64 dev vlan20-v
+
+	bridge vlan add vid 10 dev br1 self
+	bridge vlan add vid 20 dev br1 self
+
+	bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+	bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+}
+export -f ns_switch_create
+
+ns_init()
+{
+	ip link add name w1 type veth peer name w2
+	ip link add name w3 type veth peer name w4
+
+	ip link set dev lo up
+
+	ns_h1_create
+	ns_h2_create
+	ns_switch_create
+}
+export -f ns_init
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 ns_init
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+macs_populate()
+{
+	local mac1=$1; shift
+	local mac2=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+	local dst=$1; shift
+
+	bridge fdb add $mac1 dev vx10 self master extern_learn static \
+		dst $dst vlan 10
+	bridge fdb add $mac2 dev vx20 self master extern_learn static \
+		dst $dst vlan 20
+
+	ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \
+		extern_learn
+	ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \
+		extern_learn
+}
+export -f macs_populate
+
+macs_initialize()
+{
+	local h1_ns_mac=$(in_ns ns1 mac_get w2)
+	local h2_ns_mac=$(in_ns ns1 mac_get w4)
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
+	macs_populate $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \
+		2001:db8:3::2
+	in_ns ns1 macs_populate $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \
+		2001:db8:3::1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	spine_create
+	ns1_create
+	in_ns ns1 forwarding_enable
+
+	macs_initialize
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns1_destroy
+	spine_destroy
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20"
+	ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10"
+	ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20"
+	ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20"
+	ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10"
+}
+
+arp_decap()
+{
+	# Repeat the ping tests, but without populating the neighbours. This
+	# makes sure we correctly decapsulate ARP packets
+	log_info "deleting neighbours from vlan interfaces"
+
+	ip neigh del 2001:db8:1::4 dev vlan10
+	ip neigh del 2001:db8:2::4 dev vlan20
+
+	ping_ipv6
+
+	ip neigh replace 2001:db8:1::4 lladdr $(in_ns ns1 mac_get w2) \
+		nud noarp dev vlan10 extern_learn
+	ip neigh replace 2001:db8:2::4 lladdr $(in_ns ns1 mac_get w4) \
+		nud noarp dev vlan20 extern_learn
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index ce6bea9675c0..6f0a2e452ba1 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -495,7 +495,7 @@ vxlan_ping_test()
 	local delta=$((t1 - t0))
 
 	# Tolerate a couple stray extra packets.
-	((expect <= delta && delta <= expect + 2))
+	((expect <= delta && delta <= expect + 5))
 	check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
 }
 
@@ -532,7 +532,7 @@ __test_ecn_encap()
 	RET=0
 
 	tc filter add dev v1 egress pref 77 prot ip \
-		flower ip_tos $tos action pass
+		flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass
 	sleep 1
 	vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10
 	tc filter del dev v1 egress pref 77 prot ip
@@ -657,10 +657,21 @@ test_ecn_decap()
 {
 	# In accordance with INET_ECN_decapsulate()
 	__test_ecn_decap 00 00 0x00
+	__test_ecn_decap 00 01 0x00
+	__test_ecn_decap 00 02 0x00
+	# 00 03 is tested in test_ecn_decap_error()
+	__test_ecn_decap 01 00 0x01
 	__test_ecn_decap 01 01 0x01
-	__test_ecn_decap 02 01 0x02
+	__test_ecn_decap 01 02 0x01
 	__test_ecn_decap 01 03 0x03
+	__test_ecn_decap 02 00 0x02
+	__test_ecn_decap 02 01 0x01
+	__test_ecn_decap 02 02 0x02
 	__test_ecn_decap 02 03 0x03
+	__test_ecn_decap 03 00 0x03
+	__test_ecn_decap 03 01 0x03
+	__test_ecn_decap 03 02 0x03
+	__test_ecn_decap 03 03 0x03
 	test_ecn_decap_error
 }
 
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
new file mode 100755
index 000000000000..a603f7b0a08f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
@@ -0,0 +1,804 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# |    + $h1              |                          |    + $h2               |
+# |    | 192.0.2.1/28     |                          |    | 192.0.2.2/28      |
+# |    | 2001:db8:1::1/64 |                          |    | 2001:db8:1::2/64  |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1d)             + $swp2           | |
+# | |                                                                       | |
+# | |  + vx1 (vxlan)                                                        | |
+# | |    local 2001:db8:3::1                                                | |
+# | |    remote 2001:db8:4::1 2001:db8:5::1                                 | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  2001:db8:4::0/64 via 2001:db8:3::2                                       |
+# |  2001:db8:5::0/64 via 2001:db8:3::2                                       |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 2001:db8:3::1/64                                                     |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------------------------------------+
+# |    |                                             VRP2 (vrf)   |
+# |    + $rp2                                                     |
+# |      2001:db8:3::2/64                                         |
+# |                                                               |  (maybe) HW
+# =============================================================================
+# |                                                               |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)        |
+# |    | 2001:db8:4::2/64                      | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+#      |                                       |
+# +----|--------------------------------+ +----|-------------------------------+
+# |    + v2 (veth)        NS1 (netns)   | |    + v4 (veth)        NS2 (netns)  |
+# |      2001:db8:4::1/64               | |      2001:db8:5::1/64              |
+# |                                     | |                                    |
+# | 2001:db8:3::0/64 via 2001:db8:4::2  | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via              |
+# |                                     | |         2001:db8:5::2              |
+# |                                     | |                                    |
+# | +-------------------------------+   | | +-------------------------------+  |
+# | |                  BR2 (802.1d) |   | | |                  BR2 (802.1d) |  |
+# | |  + vx2 (vxlan)                |   | | |  + vx2 (vxlan)                |  |
+# | |    local 2001:db8:4::1        |   | | |    local 2001:db8:5::1        |  |
+# | |    remote 2001:db8:3::1       |   | | |    remote 2001:db8:3::1       |  |
+# | |    remote 2001:db8:5::1       |   | | |    remote 2001:db8:4::1       |  |
+# | |    id 1000 dstport $VXPORT    |   | | |    id 1000 dstport $VXPORT    |  |
+# | |                               |   | | |                               |  |
+# | |  + w1 (veth)                  |   | | |  + w1 (veth)                  |  |
+# | +--|----------------------------+   | | +--|----------------------------+  |
+# |    |                                | |    |                               |
+# | +--|----------------------------+   | | +--|----------------------------+  |
+# | |  + w2 (veth)        VW2 (vrf) |   | | |  + w2 (veth)        VW2 (vrf) |  |
+# | |    192.0.2.3/28               |   | | |    192.0.2.4/28               |  |
+# | |    2001:db8:1::3/64           |   | | |    2001:db8:1::4/64           |  |
+# | +-------------------------------+   | | +-------------------------------+  |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+	ping_ipv6
+	test_flood
+	test_unicast
+	test_ttl
+	test_tos
+	test_ecn_encap
+	test_ecn_decap
+	reapply_config
+	ping_ipv4
+	ping_ipv6
+	test_flood
+	test_unicast
+"}
+
+NUM_NETIFS=6
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 2001:db8:3::1/64
+
+	ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+	ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+	ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+	ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+	ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+	tc qdisc add dev $rp1 clsact
+
+	ip link add name vx1 type vxlan id 1000	local 2001:db8:3::1 \
+		dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+		tos inherit ttl 100
+	ip link set dev vx1 up
+
+	ip link set dev vx1 master br1
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx1 nomaster
+	ip link set dev vx1 down
+	ip link del dev vx1
+
+	tc qdisc del dev $rp1 clsact
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 2001:db8:3::2/64
+	__simple_if_init v1 v$rp2 2001:db8:4::2/64
+	__simple_if_init v3 v$rp2 2001:db8:5::2/64
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 2001:db8:5::2/64
+	__simple_if_fini v1 2001:db8:4::2/64
+	simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local nh_addr=$1; shift
+	local host_addr_ipv4=$1; shift
+	local host_addr_ipv6=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/64
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br2
+	ip link set dev w1 up
+
+	ip link add name vx2 type vxlan id 1000 local $in_addr \
+		dstport "$VXPORT" udp6zerocsumrx
+	ip link set dev vx2 up
+	bridge fdb append dev vx2 00:00:00:00:00:00 dst 2001:db8:3::1 self
+	bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx2 master br2
+	tc qdisc add dev vx2 clsact
+
+	simple_if_init w2 $host_addr_ipv4/28 $host_addr_ipv6/64
+
+	ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+	ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+	      192.0.2.3 2001:db8:1::3
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+	      192.0.2.4 2001:db8:1::4
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+# For the first round of tests, vx1 is the first device to get
+# attached to the bridge, and at that point the local IP is already
+# configured. Try the other scenario of attaching the devices to a an
+# already-offloaded bridge, and only then assign the local IP.
+reapply_config()
+{
+	log_info "Reapplying configuration"
+
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+	ip link set dev vx1 nomaster
+	rp1_unset_addr
+	sleep 5
+
+	ip link set dev vx1 master br1
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+	sleep 1
+	rp1_set_addr
+	sleep 5
+}
+
+__ping_ipv4()
+{
+	local vxlan_local_ip=$1; shift
+	local vxlan_remote_ip=$1; shift
+	local src_ip=$1; shift
+	local dst_ip=$1; shift
+	local dev=$1; shift
+	local info=$1; shift
+
+	RET=0
+
+	tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+		flower ip_proto udp src_ip $vxlan_local_ip \
+		dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+	# Match ICMP-reply packets after decapsulation, so source IP is
+	# destination IP of the ping and destination IP is source IP of the
+	# ping.
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_ip $dst_ip dst_ip $src_ip \
+		$TC_FLAG action pass
+
+	# Send 100 packets and verify that at least 100 packets hit the rule,
+	# to overcome ARP noise.
+	PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100
+	check_err $? "Encapsulated packets did not go through router"
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100
+	check_err $? "Decapsulated packets did not go through switch"
+
+	log_test "ping: $info"
+
+	tc filter del dev $swp1 egress
+	tc filter del dev $rp1 egress
+}
+
+ping_ipv4()
+{
+	RET=0
+
+	local local_sw_ip=2001:db8:3::1
+	local remote_ns1_ip=2001:db8:4::1
+	local remote_ns2_ip=2001:db8:5::1
+	local h1_ip=192.0.2.1
+	local w2_ns1_ip=192.0.2.3
+	local w2_ns2_ip=192.0.2.4
+
+	ping_test $h1 192.0.2.2 ": local->local"
+
+	__ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \
+		"local->remote 1"
+	__ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \
+		"local->remote 2"
+}
+
+__ping_ipv6()
+{
+	local vxlan_local_ip=$1; shift
+	local vxlan_remote_ip=$1; shift
+	local src_ip=$1; shift
+	local dst_ip=$1; shift
+	local dev=$1; shift
+	local info=$1; shift
+
+	RET=0
+
+	tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+		flower ip_proto udp src_ip $vxlan_local_ip \
+		dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+	# Match ICMP-reply packets after decapsulation, so source IP is
+	# destination IP of the ping and destination IP is source IP of the
+	# ping.
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_ip $dst_ip dst_ip $src_ip $TC_FLAG action pass
+
+	# Send 100 packets and verify that at least 100 packets hit the rule,
+	# to overcome neighbor discovery noise.
+	PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $rp1 egress" 101 100
+	check_err $? "Encapsulated packets did not go through router"
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 100
+	check_err $? "Decapsulated packets did not go through switch"
+
+	log_test "ping6: $info"
+
+	tc filter del dev $swp1 egress
+	tc filter del dev $rp1 egress
+}
+
+ping_ipv6()
+{
+	RET=0
+
+	local local_sw_ip=2001:db8:3::1
+	local remote_ns1_ip=2001:db8:4::1
+	local remote_ns2_ip=2001:db8:5::1
+	local h1_ip=2001:db8:1::1
+	local w2_ns1_ip=2001:db8:1::3
+	local w2_ns2_ip=2001:db8:1::4
+
+	ping6_test $h1 2001:db8:1::2 ": local->local"
+
+	__ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \
+		"local->remote 1"
+	__ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \
+		"local->remote 2"
+}
+
+maybe_in_ns()
+{
+	echo ${1:+in_ns} $1
+}
+
+__flood_counter_add_del()
+{
+	local add_del=$1; shift
+	local dst_ip=$1; shift
+	local dev=$1; shift
+	local ns=$1; shift
+
+	# Putting the ICMP capture both to HW and to SW will end up
+	# double-counting the packets that are trapped to slow path, such as for
+	# the unicast test. Adding either skip_hw or skip_sw fixes this problem,
+	# but with skip_hw, the flooded packets are not counted at all, because
+	# those are dropped due to MAC address mismatch; and skip_sw is a no-go
+	# for veth-based topologies.
+	#
+	# So try to install with skip_sw and fall back to skip_sw if that fails.
+
+	$(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+	   proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+	   icmpv6 skip_sw action pass 2>/dev/null || \
+	$(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+	   proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+	   icmpv6 skip_hw action pass
+}
+
+flood_counter_install()
+{
+	__flood_counter_add_del add "$@"
+}
+
+flood_counter_uninstall()
+{
+	__flood_counter_add_del del "$@"
+}
+
+flood_fetch_stat()
+{
+	local dev=$1; shift
+	local ns=$1; shift
+
+	$(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress
+}
+
+flood_fetch_stats()
+{
+	local counters=("${@}")
+	local counter
+
+	for counter in "${counters[@]}"; do
+		flood_fetch_stat $counter
+	done
+}
+
+vxlan_flood_test()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local -a expects=("${@}")
+
+	local -a counters=($h2 "vx2 ns1" "vx2 ns2")
+	local counter
+	local key
+
+	for counter in "${counters[@]}"; do
+		flood_counter_install $dst $counter
+	done
+
+	local -a t0s=($(flood_fetch_stats "${counters[@]}"))
+	$MZ -6 $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q
+	sleep 1
+	local -a t1s=($(flood_fetch_stats "${counters[@]}"))
+
+	for key in ${!t0s[@]}; do
+		local delta=$((t1s[$key] - t0s[$key]))
+		local expect=${expects[$key]}
+
+		((expect == delta))
+		check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta."
+	done
+
+	for counter in "${counters[@]}"; do
+		flood_counter_uninstall $dst $counter
+	done
+}
+
+__test_flood()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	vxlan_flood_test $mac $dst 10 10 10
+
+	log_test "VXLAN: $what"
+}
+
+test_flood()
+{
+	__test_flood de:ad:be:ef:13:37 2001:db8:1::100 "flood"
+}
+
+vxlan_fdb_add_del()
+{
+	local add_del=$1; shift
+	local mac=$1; shift
+	local dev=$1; shift
+	local dst=$1; shift
+
+	bridge fdb $add_del dev $dev $mac self static permanent \
+		${dst:+dst} $dst 2>/dev/null
+	bridge fdb $add_del dev $dev $mac master static 2>/dev/null
+}
+
+__test_unicast()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local hit_idx=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	local -a expects=(0 0 0)
+	expects[$hit_idx]=10
+
+	vxlan_flood_test $mac $dst "${expects[@]}"
+
+	log_test "VXLAN: $what"
+}
+
+test_unicast()
+{
+	local -a targets=("$h2_mac $h2"
+			  "$r1_mac vx1 2001:db8:4::1"
+			  "$r2_mac vx1 2001:db8:5::1")
+	local target
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del add $target
+	done
+
+	__test_unicast $h2_mac 2001:db8:1::2 0 "local MAC unicast"
+	__test_unicast $r1_mac 2001:db8:1::3 1 "remote MAC 1 unicast"
+	__test_unicast $r2_mac 2001:db8:1::4 2 "remote MAC 2 unicast"
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del del $target
+	done
+}
+
+vxlan_ping_test()
+{
+	local ping_dev=$1; shift
+	local ping_dip=$1; shift
+	local ping_args=$1; shift
+	local capture_dev=$1; shift
+	local capture_dir=$1; shift
+	local capture_pref=$1; shift
+	local expect=$1; shift
+
+	local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+	ping6_do $ping_dev $ping_dip "$ping_args"
+	local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+	local delta=$((t1 - t0))
+
+	# Tolerate a couple stray extra packets.
+	((expect <= delta && delta <= expect + 5))
+	check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
+}
+
+test_ttl()
+{
+	RET=0
+
+	tc filter add dev v1 egress pref 77 protocol ipv6 \
+		flower ip_ttl 99 action pass
+	vxlan_ping_test $h1 2001:db8:1::3 "" v1 egress 77 10
+	tc filter del dev v1 egress pref 77 protocol ipv6
+
+	log_test "VXLAN: envelope TTL"
+}
+
+test_tos()
+{
+	RET=0
+
+	tc filter add dev v1 egress pref 77 protocol ipv6 \
+		flower ip_tos 0x14 action pass
+	vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x14" v1 egress 77 10
+	vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x18" v1 egress 77 0
+	tc filter del dev v1 egress pref 77 protocol ipv6
+
+	log_test "VXLAN: envelope TOS inheritance"
+}
+
+__test_ecn_encap()
+{
+	local q=$1; shift
+	local tos=$1; shift
+
+	RET=0
+
+	tc filter add dev v1 egress pref 77 protocol ipv6 \
+		flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass
+	sleep 1
+	vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10
+	tc filter del dev v1 egress pref 77 protocol ipv6
+
+	log_test "VXLAN: ECN encap: $q->$tos"
+}
+
+test_ecn_encap()
+{
+	# In accordance with INET_ECN_encapsulate()
+	__test_ecn_encap 0x00 0x00
+	__test_ecn_encap 0x01 0x01
+	__test_ecn_encap 0x02 0x02
+	__test_ecn_encap 0x03 0x02
+}
+
+vxlan_encapped_ping_do()
+{
+	local count=$1; shift
+	local dev=$1; shift
+	local next_hop_mac=$1; shift
+	local dest_ip=$1; shift
+	local dest_mac=$1; shift
+	local inner_tos=$1; shift
+	local outer_tos=$1; shift
+	local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+	local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+
+	$MZ -6 $dev -c $count -d 100msec -q \
+		-b $next_hop_mac -B $dest_ip \
+		-t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(:
+		    )"08:"$(                      : VXLAN flags
+		    )"00:00:00:"$(                : VXLAN reserved
+		    )"00:03:e8:"$(                : VXLAN VNI
+		    )"00:"$(                      : VXLAN reserved
+		    )"$dest_mac:"$(               : ETH daddr
+		    )"$(mac_get w2):"$(           : ETH saddr
+		    )"86:dd:"$(                   : ETH type
+		    )"6"$(			  : IP version
+		    )"$inner_tos"$(               : Traffic class
+		    )"0:00:00:"$(                 : Flow label
+		    )"00:08:"$(                   : Payload length
+		    )"3a:"$(                      : Next header
+		    )"04:"$(                      : Hop limit
+		    )"$saddr:"$(		  : IP saddr
+		    )"$daddr:"$(		  : IP daddr
+		    )"80:"$(			  : ICMPv6.type
+		    )"00:"$(			  : ICMPv6.code
+		    )"00:"$(			  : ICMPv6.checksum
+		    )
+}
+export -f vxlan_encapped_ping_do
+
+vxlan_encapped_ping_test()
+{
+	local ping_dev=$1; shift
+	local nh_dev=$1; shift
+	local ping_dip=$1; shift
+	local inner_tos=$1; shift
+	local outer_tos=$1; shift
+	local stat_get=$1; shift
+	local expect=$1; shift
+
+	local t0=$($stat_get)
+
+	in_ns ns1 \
+		vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \
+			$ping_dip $(mac_get $h1) \
+			$inner_tos $outer_tos
+	sleep 1
+	local t1=$($stat_get)
+	local delta=$((t1 - t0))
+
+	# Tolerate a couple stray extra packets.
+	((expect <= delta && delta <= expect + 2))
+	check_err $? "Expected to capture $expect packets, got $delta."
+}
+export -f vxlan_encapped_ping_test
+
+__test_ecn_decap()
+{
+	local orig_inner_tos=$1; shift
+	local orig_outer_tos=$1; shift
+	local decapped_tos=$1; shift
+
+	RET=0
+
+	tc filter add dev $h1 ingress pref 77 protocol ipv6 \
+		flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 \
+		ip_tos $decapped_tos action drop
+	sleep 1
+	vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \
+				 $orig_inner_tos $orig_outer_tos \
+				 "tc_rule_stats_get $h1 77 ingress" 10
+	tc filter del dev $h1 ingress pref 77
+
+	log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos"
+}
+
+test_ecn_decap_error()
+{
+	local orig_inner_tos="0:0"
+	local orig_outer_tos=03
+
+	RET=0
+
+	vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \
+				 $orig_inner_tos $orig_outer_tos \
+				 "link_stats_rx_errors_get vx1" 10
+
+	log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error"
+}
+
+test_ecn_decap()
+{
+	# In accordance with INET_ECN_decapsulate()
+	__test_ecn_decap "0:0" 00 0x00
+	__test_ecn_decap "0:0" 01 0x00
+	__test_ecn_decap "0:0" 02 0x00
+	# 00 03 is tested in test_ecn_decap_error()
+	__test_ecn_decap "0:1" 00 0x01
+	__test_ecn_decap "0:1" 01 0x01
+	__test_ecn_decap "0:1" 02 0x01
+	__test_ecn_decap "0:1" 03 0x03
+	__test_ecn_decap "0:2" 00 0x02
+	__test_ecn_decap "0:2" 01 0x01
+	__test_ecn_decap "0:2" 02 0x02
+	__test_ecn_decap "0:2" 03 0x03
+	__test_ecn_decap "0:3" 00 0x03
+	__test_ecn_decap "0:3" 01 0x03
+	__test_ecn_decap "0:3" 02 0x03
+	__test_ecn_decap "0:3" 03 0x03
+	test_ecn_decap_error
+}
+
+test_all()
+{
+	log_info "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh
new file mode 100755
index 000000000000..00540317737a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN tests with an unusual port number.
+
+VXPORT=8472
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
+source vxlan_bridge_1d_ipv6.sh
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index a5789721ba92..fb9a34cb50c6 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -680,26 +680,6 @@ test_pvid()
 	log_test "VXLAN: flood after vlan re-add"
 }
 
-vxlan_ping_test()
-{
-	local ping_dev=$1; shift
-	local ping_dip=$1; shift
-	local ping_args=$1; shift
-	local capture_dev=$1; shift
-	local capture_dir=$1; shift
-	local capture_pref=$1; shift
-	local expect=$1; shift
-
-	local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
-	ping_do $ping_dev $ping_dip "$ping_args"
-	local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
-	local delta=$((t1 - t0))
-
-	# Tolerate a couple stray extra packets.
-	((expect <= delta && delta <= expect + 2))
-	check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
-}
-
 __test_learning()
 {
 	local -a expects=(0 0 0 0 0)
@@ -770,7 +750,7 @@ __test_learning()
 	expects[0]=0; expects[$idx1]=10; expects[$idx2]=0
 	vxlan_flood_test $mac $dst $vid "${expects[@]}"
 
-	sleep 20
+	sleep 60
 
 	bridge fdb show brport $vx | grep $mac | grep -q self
 	check_fail $?
@@ -816,11 +796,11 @@ test_learning()
 	local dst=192.0.2.100
 	local vid=10
 
-	# Enable learning on the VxLAN devices and set ageing time to 10 seconds
-	ip link set dev br1 type bridge ageing_time 1000
-	ip link set dev vx10 type vxlan ageing 10
+	# Enable learning on the VxLAN devices and set ageing time to 30 seconds
+	ip link set dev br1 type bridge ageing_time 3000
+	ip link set dev vx10 type vxlan ageing 30
 	ip link set dev vx10 type vxlan learning
-	ip link set dev vx20 type vxlan ageing 10
+	ip link set dev vx20 type vxlan ageing 30
 	ip link set dev vx20 type vxlan learning
 	reapply_config
 
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh
new file mode 100755
index 000000000000..e83fde79f40d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh
@@ -0,0 +1,837 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# | + $h1.10              |                          | + $h2.10               |
+# | | 192.0.2.1/28        |                          | | 192.0.2.2/28         |
+# | | 2001:db8:1::1/64    |                          | | 2001:db8:1::2/64     |
+# | |                     |                          | |                      |
+# | |  + $h1.20           |                          | |  + $h2.20            |
+# | \  | 198.51.100.1/24  |                          | \  | 198.51.100.2/24   |
+# |  \ | 2001:db8:2::1/64 |                          |  \ | 2001:db8:2::2/64  |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1q)             + $swp2           | |
+# | |     vid 10                                             vid 10         | |
+# | |     vid 20                                             vid 20         | |
+# | |                                                                       | |
+# | |  + vx10 (vxlan)                        + vx20 (vxlan)                 | |
+# | |    local:                                local:                       | |
+# | |    2001:db8:3::1                         2001:db8:3::1                | |
+# | |    remote:                               remote:                      | |
+# | |    2001:db8:4::1 2001:db8:5::1           2001:db8:4::1 2001:db8:5::1  | |
+# | |    id 1000 dstport $VXPORT               id 2000 dstport $VXPORT      | |
+# | |    vid 10 pvid untagged                  vid 20 pvid untagged         | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  2001:db8:4::0/64 via 2001:db8:3::2                                       |
+# |  2001:db8:5::0/64 via 2001:db8:3::2                                       |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 2001:db8:3::1/64                                                     |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------------------------------------+
+# |    |                                             VRP2 (vrf)   |
+# |    + $rp2                                                     |
+# |      2001:db8:3::2/64                                         |
+# |                                                               |  (maybe) HW
+# =============================================================================
+# |                                                               |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)        |
+# |    | 2001:db8:4::2/64                      | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+#      |                                       |
+# +----|--------------------------------+ +----|-------------------------------+
+# |    + v2 (veth)        NS1 (netns)   | |    + v4 (veth)        NS2 (netns)  |
+# |      2001:db8:4::1/64               | |      2001:db8:5::1/64              |
+# |                                     | |                                    |
+# | 2001:db8:3::0/64 via 2001:db8:4::2  | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via              |
+# |                                     | |         2001:db8:5::2              |
+# |                                     | |                                    |
+# | +-------------------------------+   | | +-------------------------------+  |
+# | |                  BR2 (802.1q) |   | | |                  BR2 (802.1q) |  |
+# | |  + vx10 (vxlan)               |   | | |  + vx10 (vxlan)               |  |
+# | |    local 2001:db8:4::1        |   | | |    local 2001:db8:5::1        |  |
+# | |    remote 2001:db8:3::1       |   | | |    remote 2001:db8:3::1       |  |
+# | |    remote 2001:db8:5::1       |   | | |    remote 2001:db8:4::1       |  |
+# | |    id 1000 dstport $VXPORT    |   | | |    id 1000 dstport $VXPORT    |  |
+# | |    vid 10 pvid untagged       |   | | |    vid 10 pvid untagged       |  |
+# | |                               |   | | |                               |  |
+# | |  + vx20 (vxlan)               |   | | |  + vx20 (vxlan)               |  |
+# | |    local 2001:db8:4::1        |   | | |    local 2001:db8:5::1        |  |
+# | |    remote 2001:db8:3::1       |   | | |    remote 2001:db8:3::1       |  |
+# | |    remote 2001:db8:5::1       |   | | |    remote 2001:db8:4::1       |  |
+# | |    id 2000 dstport $VXPORT    |   | | |    id 2000 dstport $VXPORT    |  |
+# | |    vid 20 pvid untagged       |   | | |    vid 20 pvid untagged       |  |
+# | |                               |   | | |                               |  |
+# | |  + w1 (veth)                  |   | | |  + w1 (veth)                  |  |
+# | |  | vid 10                     |   | | |  | vid 10                     |  |
+# | |  | vid 20                     |   | | |  | vid 20                     |  |
+# | +--|----------------------------+   | | +--|----------------------------+  |
+# |    |                                | |    |                               |
+# | +--|----------------------------+   | | +--|----------------------------+  |
+# | |  + w2 (veth)        VW2 (vrf) |   | | |  + w2 (veth)        VW2 (vrf) |  |
+# | |  |\                           |   | | |  |\                           |  |
+# | |  | + w2.10                    |   | | |  | + w2.10                    |  |
+# | |  |   192.0.2.3/28             |   | | |  |   192.0.2.4/28             |  |
+# | |  |   2001:db8:1::3/64         |   | | |  |   2001:db8:1::4/64         |  |
+# | |  |                            |   | | |  |                            |  |
+# | |  + w2.20                      |   | | |  + w2.20                      |  |
+# | |    198.51.100.3/24            |   | | |    198.51.100.4/24            |  |
+# | |    2001:db8:2::3/64           |   | | |    2001:db8:2::4/64           |  |
+# | +-------------------------------+   | | +-------------------------------+  |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+	ping_ipv6
+	test_flood
+	test_unicast
+	reapply_config
+	ping_ipv4
+	ping_ipv6
+	test_flood
+	test_unicast
+	test_pvid
+	ping_ipv4
+	ping_ipv6
+	test_flood
+	test_pvid
+"}
+
+NUM_NETIFS=6
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 10 v$h2 192.0.2.2/28 2001:db8:1::2/64
+	vlan_create $h2 20 v$h2 198.51.100.2/24 2001:db8:2::2/64
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 2001:db8:3::1/64
+
+	ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+	ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+	ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+	ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+	ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+	tc qdisc add dev $rp1 clsact
+
+	ip link add name vx10 type vxlan id 1000 local 2001:db8:3::1 \
+		dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+		tos inherit ttl 100
+	ip link set dev vx10 up
+
+	ip link set dev vx10 master br1
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link add name vx20 type vxlan id 2000 local 2001:db8:3::1 \
+		dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+		tos inherit ttl 100
+	ip link set dev vx20 up
+
+	ip link set dev vx20 master br1
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan add vid 20 dev $swp1
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	bridge vlan add vid 10 dev $swp2
+	bridge vlan add vid 20 dev $swp2
+
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+	bridge vlan del vid 20 dev $swp2
+	bridge vlan del vid 10 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 20 dev $swp1
+	bridge vlan del vid 10 dev $swp1
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	bridge vlan del vid 20 dev vx20
+	ip link set dev vx20 nomaster
+
+	ip link set dev vx20 down
+	ip link del dev vx20
+
+	bridge vlan del vid 10 dev vx10
+	ip link set dev vx10 nomaster
+
+	ip link set dev vx10 down
+	ip link del dev vx10
+
+	tc qdisc del dev $rp1 clsact
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 2001:db8:3::2/64
+	__simple_if_init v1 v$rp2 2001:db8:4::2/64
+	__simple_if_init v3 v$rp2 2001:db8:5::2/64
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 2001:db8:5::2/64
+	__simple_if_fini v1 2001:db8:4::2/64
+	simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local nh_addr=$1; shift
+	local host_addr1_ipv4=$1; shift
+	local host_addr1_ipv6=$1; shift
+	local host_addr2_ipv4=$1; shift
+	local host_addr2_ipv6=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/64
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0
+	ip link set dev br2 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br2
+	ip link set dev w1 up
+
+	bridge vlan add vid 10 dev w1
+	bridge vlan add vid 20 dev w1
+
+	ip link add name vx10 type vxlan id 1000 local $in_addr \
+		dstport "$VXPORT" udp6zerocsumrx
+	ip link set dev vx10 up
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:3::1 self
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx10 master br2
+	tc qdisc add dev vx10 clsact
+
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link add name vx20 type vxlan id 2000 local $in_addr \
+		dstport "$VXPORT" udp6zerocsumrx
+	ip link set dev vx20 up
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:3::1 self
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx20 master br2
+	tc qdisc add dev vx20 clsact
+
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	simple_if_init w2
+        vlan_create w2 10 vw2 $host_addr1_ipv4/28 $host_addr1_ipv6/64
+        vlan_create w2 20 vw2 $host_addr2_ipv4/24 $host_addr2_ipv6/64
+
+	ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+	ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+	      192.0.2.3 2001:db8:1::3 198.51.100.3 2001:db8:2::3
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+	      192.0.2.4 2001:db8:1::4 198.51.100.4 2001:db8:2::4
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+# For the first round of tests, vx10 and vx20 were the first devices to get
+# attached to the bridge, and at that point the local IP is already
+# configured. Try the other scenario of attaching these devices to a bridge
+# that already has local ports members, and only then assign the local IP.
+reapply_config()
+{
+	log_info "Reapplying configuration"
+
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+	ip link set dev vx20 nomaster
+	ip link set dev vx10 nomaster
+
+	rp1_unset_addr
+	sleep 5
+
+	ip link set dev vx10 master br1
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link set dev vx20 master br1
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+	rp1_set_addr
+	sleep 5
+}
+
+__ping_ipv4()
+{
+	local vxlan_local_ip=$1; shift
+	local vxlan_remote_ip=$1; shift
+	local src_ip=$1; shift
+	local dst_ip=$1; shift
+	local dev=$1; shift
+	local info=$1; shift
+
+	RET=0
+
+	tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+		flower ip_proto udp src_ip $vxlan_local_ip \
+		dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+	# Match ICMP-reply packets after decapsulation, so source IP is
+	# destination IP of the ping and destination IP is source IP of the
+	# ping.
+	tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \
+		flower vlan_ethtype ipv4 src_ip $dst_ip dst_ip $src_ip \
+		$TC_FLAG action pass
+
+	# Send 100 packets and verify that at least 100 packets hit the rule,
+	# to overcome ARP noise.
+	PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100
+	check_err $? "Encapsulated packets did not go through router"
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100
+	check_err $? "Decapsulated packets did not go through switch"
+
+	log_test "ping: $info"
+
+	tc filter del dev $swp1 egress
+	tc filter del dev $rp1 egress
+}
+
+ping_ipv4()
+{
+	RET=0
+
+	local local_sw_ip=2001:db8:3::1
+	local remote_ns1_ip=2001:db8:4::1
+	local remote_ns2_ip=2001:db8:5::1
+	local h1_10_ip=192.0.2.1
+	local h1_20_ip=198.51.100.1
+	local w2_10_ns1_ip=192.0.2.3
+	local w2_10_ns2_ip=192.0.2.4
+	local w2_20_ns1_ip=198.51.100.3
+	local w2_20_ns2_ip=198.51.100.4
+
+	ping_test $h1.10 192.0.2.2 ": local->local vid 10"
+	ping_test $h1.20 198.51.100.2 ": local->local vid 20"
+
+	__ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \
+		"local->remote 1 vid 10"
+	__ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \
+		"local->remote 2 vid 10"
+	__ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \
+		"local->remote 1 vid 20"
+	__ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \
+		"local->remote 2 vid 20"
+}
+
+__ping_ipv6()
+{
+	local vxlan_local_ip=$1; shift
+	local vxlan_remote_ip=$1; shift
+	local src_ip=$1; shift
+	local dst_ip=$1; shift
+	local dev=$1; shift
+	local info=$1; shift
+
+	RET=0
+
+	tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+		flower ip_proto udp src_ip $vxlan_local_ip \
+		dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+	# Match ICMP-reply packets after decapsulation, so source IP is
+	# destination IP of the ping and destination IP is source IP of the
+	# ping.
+	tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \
+		flower vlan_ethtype ipv6 src_ip $dst_ip dst_ip $src_ip \
+		$TC_FLAG action pass
+
+	# Send 100 packets and verify that at least 100 packets hit the rule,
+	# to overcome neighbor discovery noise.
+	PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $rp1 egress" 101 100
+	check_err $? "Encapsulated packets did not go through router"
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 100
+	check_err $? "Decapsulated packets did not go through switch"
+
+	log_test "ping6: $info"
+
+	tc filter del dev $swp1 egress
+	tc filter del dev $rp1 egress
+}
+
+ping_ipv6()
+{
+	RET=0
+
+	local local_sw_ip=2001:db8:3::1
+	local remote_ns1_ip=2001:db8:4::1
+	local remote_ns2_ip=2001:db8:5::1
+	local h1_10_ip=2001:db8:1::1
+	local h1_20_ip=2001:db8:2::1
+	local w2_10_ns1_ip=2001:db8:1::3
+	local w2_10_ns2_ip=2001:db8:1::4
+	local w2_20_ns1_ip=2001:db8:2::3
+	local w2_20_ns2_ip=2001:db8:2::4
+
+	ping6_test $h1.10 2001:db8:1::2 ": local->local vid 10"
+	ping6_test $h1.20 2001:db8:2::2 ": local->local vid 20"
+
+	__ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \
+		"local->remote 1 vid 10"
+	__ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \
+		"local->remote 2 vid 10"
+	__ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \
+		"local->remote 1 vid 20"
+	__ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \
+		"local->remote 2 vid 20"
+}
+
+maybe_in_ns()
+{
+	echo ${1:+in_ns} $1
+}
+
+__flood_counter_add_del()
+{
+	local add_del=$1; shift
+	local dst_ip=$1; shift
+	local dev=$1; shift
+	local ns=$1; shift
+
+	# Putting the ICMP capture both to HW and to SW will end up
+	# double-counting the packets that are trapped to slow path, such as for
+	# the unicast test. Adding either skip_hw or skip_sw fixes this problem,
+	# but with skip_hw, the flooded packets are not counted at all, because
+	# those are dropped due to MAC address mismatch; and skip_sw is a no-go
+	# for veth-based topologies.
+	#
+	# So try to install with skip_sw and fall back to skip_sw if that fails.
+
+	$(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+	   proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+	   icmpv6 skip_sw action pass 2>/dev/null || \
+	$(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+	   proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+	   icmpv6 skip_hw action pass
+}
+
+flood_counter_install()
+{
+	__flood_counter_add_del add "$@"
+}
+
+flood_counter_uninstall()
+{
+	__flood_counter_add_del del "$@"
+}
+
+flood_fetch_stat()
+{
+	local dev=$1; shift
+	local ns=$1; shift
+
+	$(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress
+}
+
+flood_fetch_stats()
+{
+	local counters=("${@}")
+	local counter
+
+	for counter in "${counters[@]}"; do
+		flood_fetch_stat $counter
+	done
+}
+
+vxlan_flood_test()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local vid=$1; shift
+	local -a expects=("${@}")
+
+	local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2")
+	local counter
+	local key
+
+	# Packets reach the local host tagged whereas they reach the VxLAN
+	# devices untagged. In order to be able to use the same filter for
+	# all counters, make sure the packets also reach the local host
+	# untagged
+	bridge vlan add vid $vid dev $swp2 untagged
+	for counter in "${counters[@]}"; do
+		flood_counter_install $dst $counter
+	done
+
+	local -a t0s=($(flood_fetch_stats "${counters[@]}"))
+	$MZ -6 $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q
+	sleep 1
+	local -a t1s=($(flood_fetch_stats "${counters[@]}"))
+
+	for key in ${!t0s[@]}; do
+		local delta=$((t1s[$key] - t0s[$key]))
+		local expect=${expects[$key]}
+
+		((expect == delta))
+		check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta."
+	done
+
+	for counter in "${counters[@]}"; do
+		flood_counter_uninstall $dst $counter
+	done
+	bridge vlan add vid $vid dev $swp2
+}
+
+__test_flood()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local vid=$1; shift
+	local what=$1; shift
+	local -a expects=("${@}")
+
+	RET=0
+
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: $what"
+}
+
+test_flood()
+{
+	__test_flood de:ad:be:ef:13:37 2001:db8:1::100 10 "flood vlan 10" \
+		10 10 0 10 0
+	__test_flood ca:fe:be:ef:13:37 2001:db8:2::100 20 "flood vlan 20" \
+		10 0 10 0 10
+}
+
+vxlan_fdb_add_del()
+{
+	local add_del=$1; shift
+	local vid=$1; shift
+	local mac=$1; shift
+	local dev=$1; shift
+	local dst=$1; shift
+
+	bridge fdb $add_del dev $dev $mac self static permanent \
+		${dst:+dst} $dst 2>/dev/null
+	bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null
+}
+
+__test_unicast()
+{
+	local mac=$1; shift
+	local dst=$1; shift
+	local hit_idx=$1; shift
+	local vid=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	local -a expects=(0 0 0 0 0)
+	expects[$hit_idx]=10
+
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: $what"
+}
+
+test_unicast()
+{
+	local -a targets=("$h2_mac $h2"
+			  "$r1_mac vx10 2001:db8:4::1"
+			  "$r2_mac vx10 2001:db8:5::1")
+	local target
+
+	log_info "unicast vlan 10"
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del add 10 $target
+	done
+
+	__test_unicast $h2_mac 2001:db8:1::2 0 10 "local MAC unicast"
+	__test_unicast $r1_mac 2001:db8:1::3 1 10 "remote MAC 1 unicast"
+	__test_unicast $r2_mac 2001:db8:1::4 3 10 "remote MAC 2 unicast"
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del del 10 $target
+	done
+
+	log_info "unicast vlan 20"
+
+	targets=("$h2_mac $h2" "$r1_mac vx20 2001:db8:4::1" \
+		 "$r2_mac vx20 2001:db8:5::1")
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del add 20 $target
+	done
+
+	__test_unicast $h2_mac 2001:db8:2::2 0 20 "local MAC unicast"
+	__test_unicast $r1_mac 2001:db8:2::3 2 20 "remote MAC 1 unicast"
+	__test_unicast $r2_mac 2001:db8:2::4 4 20 "remote MAC 2 unicast"
+
+	for target in "${targets[@]}"; do
+		vxlan_fdb_add_del del 20 $target
+	done
+}
+
+test_pvid()
+{
+	local -a expects=(0 0 0 0 0)
+	local mac=de:ad:be:ef:13:37
+	local dst=2001:db8:1::100
+	local vid=10
+
+	# Check that flooding works
+	RET=0
+
+	expects[0]=10; expects[1]=10; expects[3]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood before pvid off"
+
+	# Toggle PVID off and test that flood to remote hosts does not work
+	RET=0
+
+	bridge vlan add vid 10 dev vx10
+
+	expects[0]=10; expects[1]=0; expects[3]=0
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood after pvid off"
+
+	# Toggle PVID on and test that flood to remote hosts does work
+	RET=0
+
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	expects[0]=10; expects[1]=10; expects[3]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood after pvid on"
+
+	# Add a new VLAN and test that it does not affect flooding
+	RET=0
+
+	bridge vlan add vid 30 dev vx10
+
+	expects[0]=10; expects[1]=10; expects[3]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	bridge vlan del vid 30 dev vx10
+
+	log_test "VXLAN: flood after vlan add"
+
+	# Remove currently mapped VLAN and test that flood to remote hosts does
+	# not work
+	RET=0
+
+	bridge vlan del vid 10 dev vx10
+
+	expects[0]=10; expects[1]=0; expects[3]=0
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood after vlan delete"
+
+	# Re-add the VLAN and test that flood to remote hosts does work
+	RET=0
+
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	expects[0]=10; expects[1]=10; expects[3]=10
+	vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+	log_test "VXLAN: flood after vlan re-add"
+}
+
+test_all()
+{
+	log_info "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh
new file mode 100755
index 000000000000..344f43ccb755
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN tests with an unusual port number.
+
+VXPORT=8472
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
+source vxlan_bridge_1q_ipv6.sh
diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh
new file mode 100755
index 000000000000..904633427fd0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh
@@ -0,0 +1,563 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+
+# +--------------------------------+            +-----------------------------+
+# |                         vrf-h1 |            |                      vrf-h2 |
+# |    + $h1                       |            | + $h2                       |
+# |    | 2001:db8:1::1/64          |            | | 2001:db8:2::1/64          |
+# |    | default via 2001:db8:1::3 |            | | default via 2001:db8:2::3 |
+# +----|---------------------------+            +-|---------------------------+
+#      |                                          |
+# +----|------------------------------------------|---------------------------+
+# | SW |                                          |                           |
+# | +--|------------------------------------------|-------------------------+ |
+# | |  + $swp1                         br1          + $swp2                 | |
+# | |     vid 10 pvid untagged                         vid 20 pvid untagged | |
+# | |                                                                       | |
+# | |  + vx10                                       + vx20                  | |
+# | |    local 2001:db8:3::1                          local 2001:db8:3::1   | |
+# | |    remote 2001:db8:3::2                         remote 2001:db8:3::2  | |
+# | |    id 1010                                      id 1020               | |
+# | |    dstport 4789                                 dstport 4789          | |
+# | |    vid 10 pvid untagged                         vid 20 pvid untagged  | |
+# | |                                                                       | |
+# | |                             + vx4001                                  | |
+# | |                               local 2001:db8:3::1                     | |
+# | |                               remote 2001:db8:3::2                    | |
+# | |                               id 104001                               | |
+# | |                               dstport 4789                            | |
+# | |                               vid 4001 pvid untagged                  | |
+# | |                                                                       | |
+# | +-----------------------------------+-----------------------------------+ |
+# |                                     |                                     |
+# | +-----------------------------------|-----------------------------------+ |
+# | |                                   |                                   | |
+# | |  +--------------------------------+--------------------------------+  | |
+# | |  |                                |                                |  | |
+# | |  + vlan10                         |                         vlan20 +  | |
+# | |  | 2001:db8:1::2/64               |               2001:db8:2::2/64 |  | |
+# | |  |                                |                                |  | |
+# | |  + vlan10-v (macvlan)             +             vlan20-v (macvlan) +  | |
+# | |    2001:db8:1::3/64           vlan4001            2001:db8:2::3/64    | |
+# | |    00:00:5e:00:01:01                             00:00:5e:00:01:01    | |
+# | |                               vrf-green                               | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |    + $rp1                                       +lo                       |
+# |    | 2001:db8:4::1/64                           2001:db8:3::1             |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                            vrf-spine                   |
+# |    + $rp2                                                   |
+# |      2001:db8:4::2/64                                       |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |                                                             |
+# |    + v1 (veth)                                              |
+# |    | 2001:db8:5::2/64                                       |
+# +----|--------------------------------------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# |    + v2 (veth)                                  +lo           NS1 (netns) |
+# |      2001:db8:5::1/64                            2001:db8:3::2/128        |
+# |                                                                           |
+# | +-----------------------------------------------------------------------+ |
+# | |                               vrf-green                               | |
+# | |  + vlan10-v (macvlan)                           vlan20-v (macvlan) +  | |
+# | |  | 2001:db8:1::3/64                               2001:db8:2::3/64 |  | |
+# | |  | 00:00:5e:00:01:01                             00:00:5e:00:01:01 |  | |
+# | |  |                            vlan4001                             |  | |
+# | |  + vlan10                         +                         vlan20 +  | |
+# | |  | 2001:db8:1::3/64               |               2001:db8:2::3/64 |  | |
+# | |  |                                |                                |  | |
+# | |  +--------------------------------+--------------------------------+  | |
+# | |                                   |                                   | |
+# | +-----------------------------------|-----------------------------------+ |
+# |                                     |                                     |
+# | +-----------------------------------+-----------------------------------+ |
+# | |                                                                       | |
+# | |  + vx10                                     + vx20                    | |
+# | |    local 2001:db8:3::2                        local 2001:db8:3::2     | |
+# | |    remote 2001:db8:3::1                       remote 2001:db8:3::1    | |
+# | |    id 1010                                    id 1020                 | |
+# | |    dstport 4789                               dstport 4789            | |
+# | |    vid 10 pvid untagged                       vid 20 pvid untagged    | |
+# | |                                                                       | |
+# | |                             + vx4001                                  | |
+# | |                               local 2001:db8:3::2                     | |
+# | |                               remote 2001:db8:3::1                    | |
+# | |                               id 104001                               | |
+# | |                               dstport 4789                            | |
+# | |                               vid 4001 pvid untagged                  | |
+# | |                                                                       | |
+# | |  + w1 (veth)                                + w3 (veth)               | |
+# | |  | vid 10 pvid untagged          br1        | vid 20 pvid untagged    | |
+# | +--|------------------------------------------|-------------------------+ |
+# |    |                                          |                           |
+# |    |                                          |                           |
+# | +--|----------------------+                +--|-------------------------+ |
+# | |  |               vrf-h1 |                |  |                  vrf-h2 | |
+# | |  + w2 (veth)            |                |  + w4 (veth)               | |
+# | |    2001:db8:1::4/64     |                |    2001:db8:2::4/64        | |
+# | |    default via          |                |    default via             | |
+# | |    2001:db8:1::3/64     |                |    2001:db8:2::3/64        | |
+# | +-------------------------+                +----------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv6
+"
+NUM_NETIFS=6
+source lib.sh
+
+hx_create()
+{
+	local vrf_name=$1; shift
+	local if_name=$1; shift
+	local ip_addr=$1; shift
+	local gw_ip=$1; shift
+
+	vrf_create $vrf_name
+	ip link set dev $if_name master $vrf_name
+	ip link set dev $vrf_name up
+	ip link set dev $if_name up
+
+	ip address add $ip_addr/64 dev $if_name
+	ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \
+		dev $if_name
+	ip route add default vrf $vrf_name nexthop via $gw_ip
+}
+export -f hx_create
+
+hx_destroy()
+{
+	local vrf_name=$1; shift
+	local if_name=$1; shift
+	local ip_addr=$1; shift
+	local gw_ip=$1; shift
+
+	ip route del default vrf $vrf_name nexthop via $gw_ip
+	ip neigh del $gw_ip dev $if_name
+	ip address del $ip_addr/64 dev $if_name
+
+	ip link set dev $if_name down
+	vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+	hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h1_destroy()
+{
+	hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h2_create()
+{
+	hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+h2_destroy()
+{
+	hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	ip address add dev $rp1 2001:db8:4::1/64
+	ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2
+
+	ip link add name vx10 type vxlan id 1010		\
+		local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx10 up
+
+	ip link set dev vx10 master br1
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link add name vx20 type vxlan id 1020		\
+		local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx20 up
+
+	ip link set dev vx20 master br1
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	ip link add name vx4001 type vxlan id 104001		\
+		local 2001:db8:3::1 dstport 4789			\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx4001 up
+
+	ip link set dev vx4001 master br1
+	bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+	ip address add 2001:db8:3::1/128 dev lo
+
+	# Create SVIs
+	vrf_create "vrf-green"
+	ip link set dev vrf-green up
+
+	ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+	ip address add 2001:db8:1::2/64 dev vlan10
+	ip link add link vlan10 name vlan10-v up master vrf-green \
+		address 00:00:5e:00:01:01 type macvlan mode private
+	ip address add 2001:db8:1::3/64 dev vlan10-v
+
+	ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+	ip address add 2001:db8:2::2/64 dev vlan20
+	ip link add link vlan20 name vlan20-v up master vrf-green \
+		address 00:00:5e:00:01:01 type macvlan mode private
+	ip address add 2001:db8:2::3/64 dev vlan20-v
+
+	ip link add link br1 name vlan4001 up master vrf-green \
+		type vlan id 4001
+
+	bridge vlan add vid 10 dev br1 self
+	bridge vlan add vid 20 dev br1 self
+	bridge vlan add vid 4001 dev br1 self
+
+	bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+	bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+	bridge vlan add vid 10 dev $swp1 pvid untagged
+	bridge vlan add vid 20 dev $swp2 pvid untagged
+}
+
+switch_destroy()
+{
+	bridge vlan del vid 20 dev br1 self
+	bridge vlan del vid 10 dev br1 self
+
+	bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
+	bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
+
+	bridge vlan del vid 4001 dev br1 self
+	ip link del dev vlan4001
+
+	ip link del dev vlan20
+
+	ip link del dev vlan10
+
+	vrf_destroy "vrf-green"
+
+	ip address del 2001:db8:3::1/128 dev lo
+
+	bridge vlan del vid 20 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	bridge vlan del vid 4001 dev vx4001
+	ip link set dev vx4001 nomaster
+
+	ip link set dev vx4001 down
+	ip link del dev vx4001
+
+	bridge vlan del vid 20 dev vx20
+	ip link set dev vx20 nomaster
+
+	ip link set dev vx20 down
+	ip link del dev vx20
+
+	bridge vlan del vid 10 dev vx10
+	ip link set dev vx10 nomaster
+
+	ip link set dev vx10 down
+	ip link del dev vx10
+
+	ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2
+	ip address del dev $rp1 2001:db8:4::1/64
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+spine_create()
+{
+	vrf_create "vrf-spine"
+	ip link set dev $rp2 master vrf-spine
+	ip link set dev v1 master vrf-spine
+	ip link set dev vrf-spine up
+	ip link set dev $rp2 up
+	ip link set dev v1 up
+
+	ip address add 2001:db8:4::2/64 dev $rp2
+	ip address add 2001:db8:5::2/64 dev v1
+
+	ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+		2001:db8:4::1
+	ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+		2001:db8:5::1
+}
+
+spine_destroy()
+{
+	ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+		2001:db8:5::1
+	ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+		2001:db8:4::1
+
+	ip address del 2001:db8:5::2/64 dev v1
+	ip address del 2001:db8:4::2/64 dev $rp2
+
+	ip link set dev v1 down
+	ip link set dev $rp2 down
+	vrf_destroy "vrf-spine"
+}
+
+ns_h1_create()
+{
+	hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3
+}
+export -f ns_h1_create
+
+ns_h2_create()
+{
+	hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3
+}
+export -f ns_h2_create
+
+ns_switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	ip link set dev br1 up
+
+	ip link set dev v2 up
+	ip address add dev v2 2001:db8:5::1/64
+	ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2
+
+	ip link add name vx10 type vxlan id 1010		\
+		local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx10 up
+
+	ip link set dev vx10 master br1
+	bridge vlan add vid 10 dev vx10 pvid untagged
+
+	ip link add name vx20 type vxlan id 1020		\
+		local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789	\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx20 up
+
+	ip link set dev vx20 master br1
+	bridge vlan add vid 20 dev vx20 pvid untagged
+
+	ip link add name vx4001 type vxlan id 104001		\
+		local 2001:db8:3::2 dstport 4789			\
+		nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+	ip link set dev vx4001 up
+
+	ip link set dev vx4001 master br1
+	bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+	ip link set dev w1 master br1
+	ip link set dev w1 up
+	bridge vlan add vid 10 dev w1 pvid untagged
+
+	ip link set dev w3 master br1
+	ip link set dev w3 up
+	bridge vlan add vid 20 dev w3 pvid untagged
+
+	ip address add 2001:db8:3::2/128 dev lo
+
+	# Create SVIs
+	vrf_create "vrf-green"
+	ip link set dev vrf-green up
+
+	ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+	ip address add 2001:db8:1::3/64 dev vlan10
+	ip link add link vlan10 name vlan10-v up master vrf-green \
+		address 00:00:5e:00:01:01 type macvlan mode private
+	ip address add 2001:db8:1::3/64 dev vlan10-v
+
+	ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+	ip address add 2001:db8:2::3/64 dev vlan20
+	ip link add link vlan20 name vlan20-v up master vrf-green \
+		address 00:00:5e:00:01:01 type macvlan mode private
+	ip address add 2001:db8:2::3/64 dev vlan20-v
+
+	ip link add link br1 name vlan4001 up master vrf-green \
+		type vlan id 4001
+
+	bridge vlan add vid 10 dev br1 self
+	bridge vlan add vid 20 dev br1 self
+	bridge vlan add vid 4001 dev br1 self
+
+	bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+	bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+}
+export -f ns_switch_create
+
+ns_init()
+{
+	ip link add name w1 type veth peer name w2
+	ip link add name w3 type veth peer name w4
+
+	ip link set dev lo up
+
+	ns_h1_create
+	ns_h2_create
+	ns_switch_create
+}
+export -f ns_init
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 ns_init
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+__l2_vni_init()
+{
+	local mac1=$1; shift
+	local mac2=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+	local dst=$1; shift
+
+	bridge fdb add $mac1 dev vx10 self master extern_learn static \
+		dst $dst vlan 10
+	bridge fdb add $mac2 dev vx20 self master extern_learn static \
+		dst $dst vlan 20
+
+	ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \
+		extern_learn
+	ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \
+		extern_learn
+}
+export -f __l2_vni_init
+
+l2_vni_init()
+{
+	local h1_ns_mac=$(in_ns ns1 mac_get w2)
+	local h2_ns_mac=$(in_ns ns1 mac_get w4)
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
+	__l2_vni_init $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \
+	       2001:db8:3::2
+	in_ns ns1 __l2_vni_init $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \
+	       2001:db8:3::1
+}
+
+__l3_vni_init()
+{
+	local mac=$1; shift
+	local vtep_ip=$1; shift
+	local host1_ip=$1; shift
+	local host2_ip=$1; shift
+
+	bridge fdb add $mac dev vx4001 self master extern_learn static \
+		dst $vtep_ip vlan 4001
+
+	ip neigh add $vtep_ip lladdr $mac nud noarp dev vlan4001 extern_learn
+
+	ip route add $host1_ip/128 vrf vrf-green nexthop via $vtep_ip \
+		dev vlan4001 onlink
+	ip route add $host2_ip/128 vrf vrf-green nexthop via $vtep_ip \
+		dev vlan4001 onlink
+}
+export -f __l3_vni_init
+
+l3_vni_init()
+{
+	local vlan4001_ns_mac=$(in_ns ns1 mac_get vlan4001)
+	local vlan4001_mac=$(mac_get vlan4001)
+
+	__l3_vni_init $vlan4001_ns_mac 2001:db8:3::2 2001:db8:1::4 \
+		2001:db8:2::4
+	in_ns ns1 __l3_vni_init $vlan4001_mac 2001:db8:3::1 2001:db8:1::1 \
+		2001:db8:2::1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	spine_create
+	ns1_create
+	in_ns ns1 forwarding_enable
+
+	l2_vni_init
+	l3_vni_init
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns1_destroy
+	spine_destroy
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20"
+	ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10"
+	ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20"
+	ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20"
+	ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
new file mode 100755
index 000000000000..977070ed42b3
--- /dev/null
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that FQ has a packet limit per band:
+#
+# 1. set the limit to 10 per band
+# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped
+# 3. send 20 pkts on band A: verify that  0 are queued, 20 dropped
+# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
+#
+# Send packets with a delay to ensure that previously sent
+# packets are still queued when later ones are sent.
+# Use SO_TXTIME for this.
+
+die() {
+	echo "$1"
+	exit 1
+}
+
+# run inside private netns
+if [[ $# -eq 0 ]]; then
+	./in_netns.sh "$0" __subprocess
+	exit
+fi
+
+ip link add type dummy
+ip link set dev dummy0 up
+ip -6 addr add fdaa::1/128 dev dummy0
+ip -6 route add fdaa::/64 dev dummy0
+tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
+
+DELAY=400000
+
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
+OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
+OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000
+OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Initial stats will report zero sent, as all packets are still
+# queued in FQ. Sleep for at least the delay period and see that
+# twenty are now sent.
+sleep 0.6
+OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Log the output after the test
+echo "${OUT1}"
+echo "${OUT2}"
+echo "${OUT3}"
+echo "${OUT4}"
+
+# Test the output for expected values
+echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10'  || die "unexpected drop count at 1"
+echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30'  || die "unexpected drop count at 2"
+echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40'  || die "unexpected drop count at 3"
+echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4"
diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
new file mode 100755
index 000000000000..5100d90f92d2
--- /dev/null
+++ b/tools/testing/selftests/net/gre_gso.sh
@@ -0,0 +1,235 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking GRE GSO.
+source lib.sh
+ret=0
+
+# all tests in this script. Can be overridden with -t option
+TESTS="gre_gso"
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+TMPFILE=`mktemp`
+PID=
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+setup()
+{
+	set -e
+	setup_ns ns1
+	IP="ip -netns $ns1"
+	NS_EXEC="ip netns exec $ns1"
+
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 up
+	ip link set veth1 netns $ns1
+	$IP link set veth1 name veth0
+	$IP link set veth0 up
+
+	dd if=/dev/urandom of=$TMPFILE bs=1024 count=2048 &>/dev/null
+	set +e
+}
+
+cleanup()
+{
+	rm -rf $TMPFILE
+	[ -n "$PID" ] && kill $PID
+	ip link del dev gre1 &> /dev/null
+	ip link del dev veth0 &> /dev/null
+	cleanup_ns $ns1
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local ns=$2
+	local addr
+
+	[ -n "$ns" ] && ns="-netns $ns"
+
+	addr=$(ip -6 -br $ns addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+gre_create_tun()
+{
+	local a1=$1
+	local a2=$2
+	local mode
+
+	[[ $a1 =~ ^[0-9.]*$ ]] && mode=gre || mode=ip6gre
+
+	ip tunnel add gre1 mode $mode local $a1 remote $a2 dev veth0
+	ip link set gre1 up
+	$IP tunnel add gre1 mode $mode local $a2 remote $a1 dev veth0
+	$IP link set gre1 up
+}
+
+gre_gst_test_checks()
+{
+	local name=$1
+	local addr=$2
+	local proto=$3
+
+	[ "$proto" == 6 ] && addr="[$addr]"
+
+	$NS_EXEC socat - tcp${proto}-listen:$port,reuseaddr,fork >/dev/null &
+	PID=$!
+	while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done
+
+	cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port
+	log_test $? 0 "$name - copy file w/ TSO"
+
+	ethtool -K veth0 tso off
+
+	cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port
+	log_test $? 0 "$name - copy file w/ GSO"
+
+	ethtool -K veth0 tso on
+
+	kill $PID
+	PID=
+}
+
+gre6_gso_test()
+{
+	local port=7777
+
+	setup
+
+	a1=$(get_linklocal veth0)
+	a2=$(get_linklocal veth0 $ns1)
+
+	gre_create_tun $a1 $a2
+
+	ip  addr add 172.16.2.1/24 dev gre1
+	$IP addr add 172.16.2.2/24 dev gre1
+
+	ip  -6 addr add 2001:db8:1::1/64 dev gre1 nodad
+	$IP -6 addr add 2001:db8:1::2/64 dev gre1 nodad
+
+	sleep 2
+
+	gre_gst_test_checks GREv6/v4 172.16.2.2 4
+	gre_gst_test_checks GREv6/v6 2001:db8:1::2 6
+
+	cleanup
+}
+
+gre_gso_test()
+{
+	gre6_gso_test
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v socat)" ]; then
+	echo "SKIP: Could not run test without socat tool"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+	case $t in
+	gre_gso)		gre_gso_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644
index 000000000000..353e1e867fbb
--- /dev/null
+++ b/tools/testing/selftests/net/gro.c
@@ -0,0 +1,1187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ *  Data packets of the same size and same header setup with correct
+ *  sequence numbers coalesce. The one exception being the last data
+ *  packet coalesced: it can be smaller than the rest and coalesced
+ *  as long as it is in the same flow.
+ * 2.ack
+ *  Pure ACK does not coalesce.
+ * 3.flags
+ *  Specific test cases: no packets with PSH, SYN, URG, RST set will
+ *  be coalesced.
+ * 4.tcp
+ *  Packets with incorrect checksum, non-consecutive seqno and
+ *  different TCP header options shouldn't coalesce. Nit: given that
+ *  some extension headers have paddings, such as timestamp, headers
+ *  that are padding differently would not be coalesced.
+ * 5.ip:
+ *  Packets with different (ECN, TTL, TOS) header, ip options or
+ *  ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ *  Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MIN_EXTHDR_SIZE 8
+#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
+#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
+
+#define ipv6_optlen(p)  (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+static const char *addr6_src = "fdaa::2";
+static const char *addr6_dst = "fdaa::1";
+static const char *addr4_src = "192.168.1.200";
+static const char *addr4_dst = "192.168.1.100";
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+	va_list args;
+
+	if (verbose) {
+		va_start(args, fmt);
+		vfprintf(stderr, fmt, args);
+		va_end(args);
+	}
+}
+
+static void setup_sock_filter(int fd)
+{
+	const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+	const int ethproto_off = offsetof(struct ethhdr, h_proto);
+	int optlen = 0;
+	int ipproto_off, opt_ipproto_off;
+	int next_off;
+
+	if (proto == PF_INET)
+		next_off = offsetof(struct iphdr, protocol);
+	else
+		next_off = offsetof(struct ipv6hdr, nexthdr);
+	ipproto_off = ETH_HLEN + next_off;
+
+	if (strcmp(testname, "ip") == 0) {
+		if (proto == PF_INET)
+			optlen = sizeof(struct ip_timestamp);
+		else {
+			BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
+			BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
+			BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
+
+			/* same size for HBH and Fragment extension header types */
+			optlen = MIN_EXTHDR_SIZE;
+			opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+				+ offsetof(struct ip6_ext, ip6e_nxt);
+		}
+	}
+
+	/* this filter validates the following:
+	 *	- packet is IPv4/IPv6 according to the running test.
+	 *	- packet is TCP. Also handles the case of one extension header and then TCP.
+	 *	- checks the packet tcp dport equals to DPORT. Also handles the case of one
+	 *	  extension header and then TCP.
+	 */
+	struct sock_filter filter[] = {
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, ethproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
+			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, ipproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
+			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, opt_ipproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off + optlen),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+			BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+			BPF_STMT(BPF_RET + BPF_K, 0),
+	};
+
+	struct sock_fprog bpf = {
+		.len = ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+		error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = data;
+	int i;
+
+	for (i = 0; i < len / 2; i++)
+		sum += words[i];
+	if (len & 1)
+		sum += ((char *)data)[len - 1];
+	return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+	sum = checksum_nofold(data, len, sum);
+	while (sum > 0xFFFF)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+	return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+	struct pseudo_header6 {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		uint16_t protocol;
+		uint16_t payload_len;
+	} ph6;
+	struct pseudo_header4 {
+		struct in_addr saddr;
+		struct in_addr daddr;
+		uint16_t protocol;
+		uint16_t payload_len;
+	} ph4;
+	uint32_t sum = 0;
+
+	if (proto == PF_INET6) {
+		if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
+			error(1, errno, "inet_pton6 source ip pseudo");
+		if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
+			error(1, errno, "inet_pton6 dest ip pseudo");
+		ph6.protocol = htons(IPPROTO_TCP);
+		ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+		sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+	} else if (proto == PF_INET) {
+		if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
+			error(1, errno, "inet_pton source ip pseudo");
+		if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
+			error(1, errno, "inet_pton dest ip pseudo");
+		ph4.protocol = htons(IPPROTO_TCP);
+		ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+		sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+	}
+
+	return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+	if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+		   &mac_addr[0], &mac_addr[1], &mac_addr[2],
+		   &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+		error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+	struct ethhdr *eth = buf;
+
+	memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+	memcpy(eth->h_source, src_mac, ETH_ALEN);
+	eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+	struct ipv6hdr *ip6h = buf;
+	struct iphdr *iph = buf;
+
+	if (proto == PF_INET6) {
+		memset(ip6h, 0, sizeof(*ip6h));
+
+		ip6h->version = 6;
+		ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+		ip6h->nexthdr = IPPROTO_TCP;
+		ip6h->hop_limit = 8;
+		if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
+			error(1, errno, "inet_pton source ip6");
+		if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
+			error(1, errno, "inet_pton dest ip6");
+	} else if (proto == PF_INET) {
+		memset(iph, 0, sizeof(*iph));
+
+		iph->version = 4;
+		iph->ihl = 5;
+		iph->ttl = 8;
+		iph->protocol	= IPPROTO_TCP;
+		iph->tot_len = htons(sizeof(struct tcphdr) +
+				payload_len + sizeof(struct iphdr));
+		iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+		if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
+			error(1, errno, "inet_pton source ip");
+		if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
+			error(1, errno, "inet_pton dest ip");
+		iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+	}
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+				int payload_len, int fin)
+{
+	struct tcphdr *tcph = buf;
+
+	memset(tcph, 0, sizeof(*tcph));
+
+	tcph->source = htons(SPORT);
+	tcph->dest = htons(DPORT);
+	tcph->seq = ntohl(START_SEQ + seq_offset);
+	tcph->ack_seq = ntohl(START_ACK + ack_offset);
+	tcph->ack = 1;
+	tcph->fin = fin;
+	tcph->doff = 5;
+	tcph->window = htons(TCP_MAXWIN);
+	tcph->urg_ptr = 0;
+	tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+	int ret = -1;
+
+	ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+	if (ret == -1)
+		error(1, errno, "sendto failure");
+	if (ret != len)
+		error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+			  int payload_len, int fin)
+{
+	memset(buf, 0, total_hdr_len);
+	memset(buf + total_hdr_len, 'a', payload_len);
+	fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+			    payload_len, fin);
+	fill_networklayer(buf + ETH_HLEN, payload_len);
+	fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+		       int rst, int urg)
+{
+	static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	int payload_len, pkt_size, flag, i;
+	struct tcphdr *tcph;
+
+	payload_len = PAYLOAD_LEN * psh;
+	pkt_size = total_hdr_len + payload_len;
+	flag = NUM_PACKETS / 2;
+
+	create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+	tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+	tcph->psh = psh;
+	tcph->syn = syn;
+	tcph->rst = rst;
+	tcph->urg = urg;
+	tcph->check = 0;
+	tcph->check = tcp_checksum(tcph, payload_len);
+
+	for (i = 0; i < NUM_PACKETS + 1; i++) {
+		if (i == flag) {
+			write_packet(fd, flag_buf, pkt_size, daddr);
+			continue;
+		}
+		create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+		write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+	}
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+			   int payload_len1, int payload_len2)
+{
+	static char buf[ETH_HLEN + IP_MAXPACKET];
+
+	create_packet(buf, 0, 0, payload_len1, 0);
+	write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+	create_packet(buf, payload_len1, 0, payload_len2, 0);
+	write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+	static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+	static char last[TOTAL_HDR_LEN + MSS];
+	static char new_seg[TOTAL_HDR_LEN + MSS];
+	int i;
+
+	for (i = 0; i < NUM_LARGE_PKT; i++)
+		create_packet(pkts[i], i * MSS, 0, MSS, 0);
+	create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+	create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+	for (i = 0; i < NUM_LARGE_PKT; i++)
+		write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+	write_packet(fd, last, total_hdr_len + remainder, daddr);
+	write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN];
+
+	create_packet(buf, 0, 0, 0, 0);
+	write_packet(fd, buf, total_hdr_len, daddr);
+	write_packet(fd, buf, total_hdr_len, daddr);
+	create_packet(buf, 0, 1, 0, 0);
+	write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+	struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	memmove(buf, no_ext, total_hdr_len);
+	memmove(buf + total_hdr_len + extlen,
+		no_ext + total_hdr_len, PAYLOAD_LEN);
+
+	tcphdr->doff = tcphdr->doff + (extlen / 4);
+	tcphdr->check = 0;
+	tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+	if (proto == PF_INET) {
+		iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	} else {
+		ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+	}
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+	struct tcp_option_ts {
+		uint8_t kind;
+		uint8_t len;
+		uint32_t tsval;
+		uint32_t tsecr;
+	} *opt_ts = (void *)buf;
+	struct tcp_option_window {
+		uint8_t kind;
+		uint8_t len;
+		uint8_t shift;
+	} *opt_window = (void *)buf;
+
+	switch (kind) {
+	case TCPOPT_NOP:
+		buf[0] = TCPOPT_NOP;
+		break;
+	case TCPOPT_WINDOW:
+		memset(opt_window, 0, sizeof(struct tcp_option_window));
+		opt_window->kind = TCPOPT_WINDOW;
+		opt_window->len = TCPOLEN_WINDOW;
+		opt_window->shift = 0;
+		break;
+	case TCPOPT_TIMESTAMP:
+		memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+		opt_ts->kind = TCPOPT_TIMESTAMP;
+		opt_ts->len = TCPOLEN_TIMESTAMP;
+		opt_ts->tsval = ts;
+		opt_ts->tsecr = 0;
+		break;
+	default:
+		error(1, 0, "unimplemented TCP option");
+		break;
+	}
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+	switch (order) {
+	case 0:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+				  TCPOPT_TIMESTAMP, ts);
+		break;
+	case 1:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 1,
+				  TCPOPT_TIMESTAMP, ts);
+		tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+				  TCPOPT_NOP, 0);
+		break;
+	case 2:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+		tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+				  TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+				  TCPOPT_NOP, 0);
+		break;
+	default:
+		error(1, 0, "unknown order");
+		break;
+	}
+	recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	tcph->check = tcph->check - 1;
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	tcph->seq = ntohl(htonl(tcph->seq) + 1);
+	tcph->check = 0;
+	tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+  * don't coalesce.
+  */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+	int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 0, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 0, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 1);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 2);
+	write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+	static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+	int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+	int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt1, buf, 0, 0);
+	write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt1, buf, 0, 0);
+	write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+	tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+	recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+	write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+	struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+	int optlen = sizeof(struct ip_timestamp);
+	struct iphdr *iph;
+
+	if (optlen % 4)
+		error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+	ts->ipt_code = IPOPT_TS;
+	ts->ipt_len = optlen;
+	ts->ipt_ptr = 5;
+	ts->ipt_flg = IPOPT_TS_TSONLY;
+
+	memcpy(optpkt, buf, tcp_offset);
+	memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+	       sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+	iph = (struct iphdr *)(optpkt + ETH_HLEN);
+	iph->ihl = 5 + (optlen / 4);
+	iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
+{
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
+	struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
+	char *exthdr_payload_start = (char *)(exthdr + 1);
+
+	exthdr->hdrlen = 0;
+	exthdr->nexthdr = IPPROTO_TCP;
+
+	memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
+
+	memcpy(optpkt, buf, tcp_offset);
+	memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
+		sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+	iph->nexthdr = exthdr_type;
+	iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
+}
+
+static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
+	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
+	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+	int optlen = sizeof(struct ip_timestamp);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+	add_ipv4_ts_option(buf, optpkt);
+	write_packet(fd, optpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/*  IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[IP_MAXPACKET];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	/* Once fragmented, packet would retain the total_len.
+	 * Tcp header is prepared as if rest of data is in follow-up frags,
+	 * but follow up frags aren't actually sent.
+	 */
+	memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+	fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+	fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+	fill_datalinklayer(buf);
+
+	iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	iph->ttl = 7;
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	if (proto == PF_INET) {
+		iph->tos = 1;
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	} else if (proto == PF_INET6) {
+		ip6h->priority = 0xf;
+	}
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	if (proto == PF_INET) {
+		buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	} else {
+		buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+	}
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+			   sizeof(struct ip6_frag)];
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+	struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+	int extlen = sizeof(struct ip6_frag);
+	int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+	int extpkt_len = bufpkt_len + extlen;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+		write_packet(fd, buf, bufpkt_len, daddr);
+	}
+	sleep(1);
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	memset(extpkt, 0, extpkt_len);
+
+	ip6h->nexthdr = IPPROTO_FRAGMENT;
+	ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+	frag->ip6f_nxt = IPPROTO_TCP;
+
+	memcpy(extpkt, buf, tcp_offset);
+	memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+	       sizeof(struct tcphdr) + PAYLOAD_LEN);
+	write_packet(fd, extpkt, extpkt_len, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+	struct sockaddr_ll daddr = {};
+
+	daddr.sll_family = AF_PACKET;
+	daddr.sll_protocol = ethhdr_proto;
+	daddr.sll_ifindex = if_nametoindex(ifname);
+	if (daddr.sll_ifindex == 0)
+		error(1, errno, "if_nametoindex");
+
+	if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+		error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+	struct timeval timeout;
+
+	timeout.tv_sec = 3;
+	timeout.tv_usec = 0;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+		       sizeof(timeout)) < 0)
+		error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+			    int correct_num_pkts)
+{
+	static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+	struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+	struct tcphdr *tcph;
+	bool bad_packet = false;
+	int tcp_ext_len = 0;
+	int ip_ext_len = 0;
+	int pkt_size = -1;
+	int data_len = 0;
+	int num_pkt = 0;
+	int i;
+
+	vlog("Expected {");
+	for (i = 0; i < correct_num_pkts; i++)
+		vlog("%d ", correct_payload[i]);
+	vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+	while (1) {
+		ip_ext_len = 0;
+		pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+		if (pkt_size < 0)
+			error(1, errno, "could not receive");
+
+		if (iph->version == 4)
+			ip_ext_len = (iph->ihl - 5) * 4;
+		else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+			ip_ext_len = MIN_EXTHDR_SIZE;
+
+		tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+		if (tcph->fin)
+			break;
+
+		tcp_ext_len = (tcph->doff - 5) * 4;
+		data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+		/* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+		 * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+		 * Packet sockets are protocol agnostic, and will not trim the padding.
+		 */
+		if (pkt_size == ETH_ZLEN && iph->version == 4) {
+			data_len = ntohs(iph->tot_len)
+				- sizeof(struct tcphdr) - sizeof(struct iphdr);
+		}
+		vlog("%d ", data_len);
+		if (data_len != correct_payload[num_pkt]) {
+			vlog("[!=%d]", correct_payload[num_pkt]);
+			bad_packet = true;
+		}
+		num_pkt++;
+	}
+	vlog("}, Total %d packets.\n", num_pkt);
+	if (num_pkt != correct_num_pkts)
+		error(1, 0, "incorrect number of packets");
+	if (bad_packet)
+		error(1, 0, "incorrect packet geometry");
+
+	printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+	static char fin_pkt[MAX_HDR_LEN];
+	struct sockaddr_ll daddr = {};
+	int txfd = -1;
+
+	txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+	if (txfd < 0)
+		error(1, errno, "socket creation");
+
+	memset(&daddr, 0, sizeof(daddr));
+	daddr.sll_ifindex = if_nametoindex(ifname);
+	if (daddr.sll_ifindex == 0)
+		error(1, errno, "if_nametoindex");
+	daddr.sll_family = AF_PACKET;
+	memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+	daddr.sll_halen = ETH_ALEN;
+	create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+	if (strcmp(testname, "data") == 0) {
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ack") == 0) {
+		send_ack(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "flags") == 0) {
+		send_flags(txfd, &daddr, 1, 0, 0, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 1, 0, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 0, 1, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 0, 0, 1);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "tcp") == 0) {
+		send_changed_checksum(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_seq(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_ts(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_diff_opt(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip") == 0) {
+		send_changed_ECN(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_tos(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+		if (proto == PF_INET) {
+			/* Modified packets may be received out of order.
+			 * Sleep function added to enforce test boundaries
+			 * so that fin pkts are not received prior to other pkts.
+			 */
+			sleep(1);
+			send_changed_ttl(txfd, &daddr);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			send_ip_options(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			send_fragment4(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+		} else if (proto == PF_INET6) {
+			sleep(1);
+			send_fragment6(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			/* send IPv6 packets with ext header with same payload */
+			send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			/* send IPv6 packets with ext header with different payload */
+			send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+		}
+	} else if (strcmp(testname, "large") == 0) {
+		/* 20 is the difference between min iphdr size
+		 * and min ipv6hdr size. Like MAX_HDR_SIZE,
+		 * MAX_PAYLOAD is defined with the larger header of the two.
+		 */
+		int offset = proto == PF_INET ? 20 : 0;
+		int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+		send_large(txfd, &daddr, remainder);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_large(txfd, &daddr, remainder + 1);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else {
+		error(1, 0, "Unknown testcase");
+	}
+
+	if (close(txfd))
+		error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+	static int correct_payload[NUM_PACKETS];
+	int rxfd = -1;
+
+	rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+	if (rxfd < 0)
+		error(1, 0, "socket creation");
+	setup_sock_filter(rxfd);
+	set_timeout(rxfd);
+	bind_packetsocket(rxfd);
+
+	memset(correct_payload, 0, sizeof(correct_payload));
+
+	if (strcmp(testname, "data") == 0) {
+		printf("pure data packet of same size: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 1);
+
+		printf("large data packets followed by a smaller one: ");
+		correct_payload[0] = PAYLOAD_LEN * 1.5;
+		check_recv_pkts(rxfd, correct_payload, 1);
+
+		printf("small data packets followed by a larger one: ");
+		correct_payload[0] = PAYLOAD_LEN / 2;
+		correct_payload[1] = PAYLOAD_LEN;
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ack") == 0) {
+		printf("duplicate ack and pure ack: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "flags") == 0) {
+		correct_payload[0] = PAYLOAD_LEN * 3;
+		correct_payload[1] = PAYLOAD_LEN * 2;
+
+		printf("psh flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = 0;
+		correct_payload[2] = PAYLOAD_LEN * 2;
+		printf("syn flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+
+		printf("rst flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+
+		printf("urg flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "tcp") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+		correct_payload[2] = PAYLOAD_LEN;
+		correct_payload[3] = PAYLOAD_LEN;
+
+		printf("changed checksum does not coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("Wrong Seq number doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("Different timestamp doesn't coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 4);
+
+		printf("Different options doesn't coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ip") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+
+		printf("different ECN doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("different tos doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		if (proto == PF_INET) {
+			printf("different ttl doesn't coalesce: ");
+			check_recv_pkts(rxfd, correct_payload, 2);
+
+			printf("ip options doesn't coalesce: ");
+			correct_payload[2] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 3);
+
+			printf("fragmented ip4 doesn't coalesce: ");
+			check_recv_pkts(rxfd, correct_payload, 2);
+		} else if (proto == PF_INET6) {
+			/* GRO doesn't check for ipv6 hop limit when flushing.
+			 * Hence no corresponding test to the ipv4 case.
+			 */
+			printf("fragmented ip6 doesn't coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			correct_payload[1] = PAYLOAD_LEN;
+			correct_payload[2] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 3);
+
+			printf("ipv6 with ext header does coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			check_recv_pkts(rxfd, correct_payload, 1);
+
+			printf("ipv6 with ext header with different payloads doesn't coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN;
+			correct_payload[1] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 2);
+		}
+	} else if (strcmp(testname, "large") == 0) {
+		int offset = proto == PF_INET ? 20 : 0;
+		int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+		correct_payload[0] = (MAX_PAYLOAD + offset);
+		correct_payload[1] = remainder;
+		printf("Shouldn't coalesce if exceed IP max pkt size: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		/* last segment sent individually, doesn't start new segment */
+		correct_payload[0] = correct_payload[0] - remainder;
+		correct_payload[1] = remainder + 1;
+		correct_payload[2] = remainder + 1;
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else {
+		error(1, 0, "Test case error, should never trigger");
+	}
+
+	if (close(rxfd))
+		error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+	static const struct option opts[] = {
+		{ "daddr", required_argument, NULL, 'd' },
+		{ "dmac", required_argument, NULL, 'D' },
+		{ "iface", required_argument, NULL, 'i' },
+		{ "ipv4", no_argument, NULL, '4' },
+		{ "ipv6", no_argument, NULL, '6' },
+		{ "rx", no_argument, NULL, 'r' },
+		{ "saddr", required_argument, NULL, 's' },
+		{ "smac", required_argument, NULL, 'S' },
+		{ "test", required_argument, NULL, 't' },
+		{ "verbose", no_argument, NULL, 'v' },
+		{ 0, 0, 0, 0 }
+	};
+	int c;
+
+	while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) {
+		switch (c) {
+		case '4':
+			proto = PF_INET;
+			ethhdr_proto = htons(ETH_P_IP);
+			break;
+		case '6':
+			proto = PF_INET6;
+			ethhdr_proto = htons(ETH_P_IPV6);
+			break;
+		case 'd':
+			addr4_dst = addr6_dst = optarg;
+			break;
+		case 'D':
+			dmac = optarg;
+			break;
+		case 'i':
+			ifname = optarg;
+			break;
+		case 'r':
+			tx_socket = false;
+			break;
+		case 's':
+			addr4_src = addr6_src = optarg;
+			break;
+		case 'S':
+			smac = optarg;
+			break;
+		case 't':
+			testname = optarg;
+			break;
+		case 'v':
+			verbose = true;
+			break;
+		default:
+			error(1, 0, "%s invalid option %c\n", __func__, c);
+			break;
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_args(argc, argv);
+
+	if (proto == PF_INET) {
+		tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+	} else if (proto == PF_INET6) {
+		tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+		total_hdr_len = MAX_HDR_LEN;
+	} else {
+		error(1, 0, "Protocol family is not ipv4 or ipv6");
+	}
+
+	read_MAC(src_mac, smac);
+	read_MAC(dst_mac, dmac);
+
+	if (tx_socket)
+		gro_sender();
+	else
+		gro_receiver();
+
+	fprintf(stderr, "Gro::%s test passed.\n", testname);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755
index 000000000000..02c21ff4ca81
--- /dev/null
+++ b/tools/testing/selftests/net/gro.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev=""
+test="all"
+proto="ipv4"
+
+run_test() {
+  local server_pid=0
+  local exit_code=0
+  local protocol=$1
+  local test=$2
+  local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+  "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+  setup_ns
+  # Each test is run 3 times to deflake, because given the receive timing,
+  # not all packets that should coalesce will be considered in the same flow
+  # on every try.
+  for tries in {1..3}; do
+    # Actual test starts here
+    ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+      1>>log.txt &
+    server_pid=$!
+    sleep 0.5  # to allow for socket init
+    ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+      1>>log.txt
+    wait "${server_pid}"
+    exit_code=$?
+    if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
+          ${exit_code} -ne 0 ]]; then
+        echo "Ignoring errors due to slow environment" 1>&2
+        exit_code=0
+    fi
+    if [[ "${exit_code}" -eq 0 ]]; then
+        break;
+    fi
+  done
+  cleanup_ns
+  echo ${exit_code}
+}
+
+run_all_tests() {
+  local failed_tests=()
+  for proto in "${PROTOS[@]}"; do
+    for test in "${TESTS[@]}"; do
+      echo "running test ${proto} ${test}" >&2
+      exit_code=$(run_test $proto $test)
+      if [[ "${exit_code}" -ne 0 ]]; then
+        failed_tests+=("${proto}_${test}")
+      fi;
+    done;
+  done
+  if [[ ${#failed_tests[@]} -ne 0 ]]; then
+    echo "failed tests: ${failed_tests[*]}. \
+    Please see log.txt for more logs"
+    exit 1
+  else
+    echo "All Tests Succeeded!"
+  fi;
+}
+
+usage() {
+  echo "Usage: $0 \
+  [-i <DEV>] \
+  [-t data|ack|flags|tcp|ip|large] \
+  [-p <ipv4|ipv6>]" 1>&2;
+  exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+  case "${opt}" in
+    i)
+      dev="${OPTARG}"
+      ;;
+    t)
+      test="${OPTARG}"
+      ;;
+    p)
+      proto="${OPTARG}"
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+if [ -n "$dev" ]; then
+	source setup_loopback.sh
+else
+	source setup_veth.sh
+fi
+
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+  run_all_tests
+else
+  run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
new file mode 100644
index 000000000000..92c1d9d080cd
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+TEST_PROGS := hsr_ping.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
new file mode 100644
index 000000000000..22061204fb69
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/config
@@ -0,0 +1,4 @@
+CONFIG_IPV6=y
+CONFIG_NET_SCH_NETEM=m
+CONFIG_HSR=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
new file mode 100755
index 000000000000..1c6457e54625
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ret=0
+ksft_skip=4
+ipv6=true
+
+optstring="h4"
+usage() {
+	echo "Usage: $0 [OPTION]"
+	echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
+}
+
+while getopts "$optstring" option;do
+	case "$option" in
+	"h")
+		usage $0
+		exit 0
+		;;
+	"4")
+		ipv6=false
+		;;
+	"?")
+		usage $0
+		exit 1
+		;;
+esac
+done
+
+sec=$(date +%s)
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+
+cleanup()
+{
+	local netns
+	for netns in "$ns1" "$ns2" "$ns3" ;do
+		ip netns del $netns
+	done
+}
+
+# $1: IP address
+is_v6()
+{
+	[ -z "${1##*:*}" ]
+}
+
+do_ping()
+{
+	local netns="$1"
+	local connect_addr="$2"
+	local ping_args="-q -c 2"
+
+	if is_v6 "${connect_addr}"; then
+		$ipv6 || return 0
+		ping_args="${ping_args} -6"
+	fi
+
+	ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
+	if [ $? -ne 0 ] ; then
+		echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
+		ret=1
+		return 1
+	fi
+
+	return 0
+}
+
+do_ping_long()
+{
+	local netns="$1"
+	local connect_addr="$2"
+	local ping_args="-q -c 10"
+
+	if is_v6 "${connect_addr}"; then
+		$ipv6 || return 0
+		ping_args="${ping_args} -6"
+	fi
+
+	OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
+	if [ $? -ne 0 ] ; then
+		echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
+		ret=1
+		return 1
+	fi
+
+	VAL="$(echo $OUT | cut -d' ' -f1-8)"
+	if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ]
+	then
+		echo "$netns -> $connect_addr ping TEST [ FAIL ]"
+		echo "Expect to send and receive 10 packets and no duplicates."
+		echo "Full message: ${OUT}."
+		ret=1
+		return 1
+	fi
+
+	return 0
+}
+
+stop_if_error()
+{
+	local msg="$1"
+
+	if [ ${ret} -ne 0 ]; then
+		echo "FAIL: ${msg}" 1>&2
+		exit ${ret}
+	fi
+}
+
+do_complete_ping_test()
+{
+	echo "INFO: Initial validation ping."
+	# Each node has to be able each one.
+	do_ping "$ns1" 100.64.0.2
+	do_ping "$ns2" 100.64.0.1
+	do_ping "$ns3" 100.64.0.1
+	stop_if_error "Initial validation failed."
+
+	do_ping "$ns1" 100.64.0.3
+	do_ping "$ns2" 100.64.0.3
+	do_ping "$ns3" 100.64.0.2
+
+	do_ping "$ns1" dead:beef:1::2
+	do_ping "$ns1" dead:beef:1::3
+	do_ping "$ns2" dead:beef:1::1
+	do_ping "$ns2" dead:beef:1::2
+	do_ping "$ns3" dead:beef:1::1
+	do_ping "$ns3" dead:beef:1::2
+
+	stop_if_error "Initial validation failed."
+
+# Wait until supervisor all supervision frames have been processed and the node
+# entries have been merged. Otherwise duplicate frames will be observed which is
+# valid at this stage.
+	WAIT=5
+	while [ ${WAIT} -gt 0 ]
+	do
+		grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table
+		if [ $? -ne 0 ]
+		then
+			break
+		fi
+		sleep 1
+		let "WAIT = WAIT - 1"
+	done
+
+# Just a safety delay in case the above check didn't handle it.
+	sleep 1
+
+	echo "INFO: Longer ping test."
+	do_ping_long "$ns1" 100.64.0.2
+	do_ping_long "$ns1" dead:beef:1::2
+	do_ping_long "$ns1" 100.64.0.3
+	do_ping_long "$ns1" dead:beef:1::3
+
+	stop_if_error "Longer ping test failed."
+
+	do_ping_long "$ns2" 100.64.0.1
+	do_ping_long "$ns2" dead:beef:1::1
+	do_ping_long "$ns2" 100.64.0.3
+	do_ping_long "$ns2" dead:beef:1::2
+	stop_if_error "Longer ping test failed."
+
+	do_ping_long "$ns3" 100.64.0.1
+	do_ping_long "$ns3" dead:beef:1::1
+	do_ping_long "$ns3" 100.64.0.2
+	do_ping_long "$ns3" dead:beef:1::2
+	stop_if_error "Longer ping test failed."
+
+	echo "INFO: Cutting one link."
+	do_ping_long "$ns1" 100.64.0.3 &
+
+	sleep 3
+	ip -net "$ns3" link set ns3eth1 down
+	wait
+
+	ip -net "$ns3" link set ns3eth1 up
+
+	stop_if_error "Failed with one link down."
+
+	echo "INFO: Delay the link and drop a few packages."
+	tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
+	tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
+
+	do_ping_long "$ns1" 100.64.0.2
+	do_ping_long "$ns1" 100.64.0.3
+
+	stop_if_error "Failed with delay and packetloss."
+
+	do_ping_long "$ns2" 100.64.0.1
+	do_ping_long "$ns2" 100.64.0.3
+
+	stop_if_error "Failed with delay and packetloss."
+
+	do_ping_long "$ns3" 100.64.0.1
+	do_ping_long "$ns3" 100.64.0.2
+	stop_if_error "Failed with delay and packetloss."
+
+	echo "INFO: All good."
+}
+
+setup_hsr_interfaces()
+{
+	local HSRv="$1"
+
+	echo "INFO: preparing interfaces for HSRv${HSRv}."
+# Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
+#
+#    ns1eth1 ----- ns2eth1
+#      hsr1         hsr2
+#    ns1eth2       ns2eth2
+#       |            |
+#    ns3eth1      ns3eth2
+#           \    /
+#            hsr3
+#
+	# Interfaces
+	ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+	ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3"
+	ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
+
+	# HSRv0/1
+	ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0
+	ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0
+	ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0
+
+	# IP for HSR
+	ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
+	ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
+	ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
+	ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
+	ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
+	ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+
+	# All Links up
+	ip -net "$ns1" link set ns1eth1 up
+	ip -net "$ns1" link set ns1eth2 up
+	ip -net "$ns1" link set hsr1 up
+
+	ip -net "$ns2" link set ns2eth1 up
+	ip -net "$ns2" link set ns2eth2 up
+	ip -net "$ns2" link set hsr2 up
+
+	ip -net "$ns3" link set ns3eth1 up
+	ip -net "$ns3" link set ns3eth2 up
+	ip -net "$ns3" link set hsr3 up
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+for i in "$ns1" "$ns2" "$ns3" ;do
+	ip netns add $i || exit $ksft_skip
+	ip -net $i link set lo up
+done
+
+setup_hsr_interfaces 0
+do_complete_ping_test
+cleanup
+
+for i in "$ns1" "$ns2" "$ns3" ;do
+	ip netns add $i || exit $ksft_skip
+	ip -net $i link set lo up
+done
+
+setup_hsr_interfaces 1
+do_complete_ping_test
+
+exit $ret
diff --git a/tools/testing/selftests/net/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c
index e1fdee841021..170728c96c46 100644
--- a/tools/testing/selftests/net/hwtstamp_config.c
+++ b/tools/testing/selftests/net/hwtstamp_config.c
@@ -16,6 +16,8 @@
 #include <linux/net_tstamp.h>
 #include <linux/sockios.h>
 
+#include "kselftest.h"
+
 static int
 lookup_value(const char **names, int size, const char *name)
 {
@@ -50,7 +52,7 @@ static const char *tx_types[] = {
 	TX_TYPE(ONESTEP_SYNC)
 #undef TX_TYPE
 };
-#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
+#define N_TX_TYPES ((int)(ARRAY_SIZE(tx_types)))
 
 static const char *rx_filters[] = {
 #define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
@@ -71,7 +73,7 @@ static const char *rx_filters[] = {
 	RX_FILTER(PTP_V2_DELAY_REQ),
 #undef RX_FILTER
 };
-#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
+#define N_RX_FILTERS ((int)(ARRAY_SIZE(rx_filters)))
 
 static void usage(void)
 {
diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh
new file mode 100755
index 000000000000..824cb0e35eff
--- /dev/null
+++ b/tools/testing/selftests/net/icmp.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for checking ICMP response with dummy address instead of 0.0.0.0.
+# Sets up two namespaces like:
+# +----------------------+                          +--------------------+
+# | ns1                  |    v4-via-v6 routes:     | ns2                |
+# |                      |                  '       |                    |
+# |             +--------+   -> 172.16.1.0/24 ->    +--------+           |
+# |             | veth0  +--------------------------+  veth0 |           |
+# |             +--------+   <- 172.16.0.0/24 <-    +--------+           |
+# |           172.16.0.1 |                          | 2001:db8:1::2/64   |
+# |     2001:db8:1::2/64 |                          |                    |
+# +----------------------+                          +--------------------+
+#
+# And then tries to ping 172.16.1.1 from ns1. This results in a "net
+# unreachable" message being sent from ns2, but there is no IPv4 address set in
+# that address space, so the kernel should substitute the dummy address
+# 192.0.0.8 defined in RFC7600.
+
+source lib.sh
+
+H1_IP=172.16.0.1/32
+H1_IP6=2001:db8:1::1
+RT1=172.16.1.0/24
+PINGADDR=172.16.1.1
+RT2=172.16.0.0/24
+H2_IP6=2001:db8:1::2
+
+TMPFILE=$(mktemp)
+
+cleanup()
+{
+    rm -f "$TMPFILE"
+    cleanup_ns $NS1 $NS2
+}
+
+trap cleanup EXIT
+
+# Namespaces
+setup_ns NS1 NS2
+
+# Connectivity
+ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2
+ip -netns $NS1 link set dev veth0 up
+ip -netns $NS2 link set dev veth0 up
+ip -netns $NS1 addr add $H1_IP dev veth0
+ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad
+ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad
+ip -netns $NS1 route add $RT1 via inet6 $H2_IP6
+ip -netns $NS2 route add $RT2 via inet6 $H1_IP6
+
+# Make sure ns2 will respond with ICMP unreachable
+ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1
+
+# Run the test - a ping runs in the background, and we capture ICMP responses
+# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout
+# in case something goes wrong
+ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null &
+ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null
+
+# Parse response and check for dummy address
+# tcpdump output looks like:
+# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92
+RESP_IP=$(awk '{print $2}' < $TMPFILE)
+if [[ "$RESP_IP" != "192.0.0.8" ]]; then
+    echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index bf361f30d6ef..d6f0e449c029 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -19,6 +19,7 @@
 # Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent
 # from r1 to h1 telling h1 to use r2 when talking to h2.
 
+source lib.sh
 VERBOSE=0
 PAUSE_ON_FAIL=no
 
@@ -63,10 +64,14 @@ log_test()
 	local rc=$1
 	local expected=$2
 	local msg="$3"
+	local xfail=$4
 
 	if [ ${rc} -eq ${expected} ]; then
 		printf "TEST: %-60s  [ OK ]\n" "${msg}"
 		nsuccess=$((nsuccess+1))
+	elif [ ${rc} -eq ${xfail} ]; then
+		printf "TEST: %-60s  [XFAIL]\n" "${msg}"
+		nxfail=$((nxfail+1))
 	else
 		ret=1
 		nfail=$((nfail+1))
@@ -136,11 +141,7 @@ get_linklocal()
 
 cleanup()
 {
-	local ns
-
-	for ns in h1 h2 r1 r2; do
-		ip netns del $ns 2>/dev/null
-	done
+	cleanup_ns $h1 $h2 $r1 $r2
 }
 
 create_vrf()
@@ -167,102 +168,99 @@ setup()
 
 	#
 	# create nodes as namespaces
-	#
-	for ns in h1 h2 r1 r2; do
-		ip netns add $ns
-		ip -netns $ns li set lo up
-
-		case "${ns}" in
-		h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
-			;;
-		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
-		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
-		       ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
-		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
-
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
-		esac
+	setup_ns h1 h2 r1 r2
+	for ns in $h1 $h2 $r1 $r2; do
+		if echo $ns | grep -q h[12]-; then
+			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+		else
+			ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+			ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
+			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+			ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
+		fi
 	done
 
 	#
 	# create interconnects
 	#
-	ip -netns h1 li add eth0 type veth peer name r1h1
-	ip -netns h1 li set r1h1 netns r1 name eth0 up
+	ip -netns $h1 li add eth0 type veth peer name r1h1
+	ip -netns $h1 li set r1h1 netns $r1 name eth0 up
 
-	ip -netns h1 li add eth1 type veth peer name r2h1
-	ip -netns h1 li set r2h1 netns r2 name eth0 up
+	ip -netns $h1 li add eth1 type veth peer name r2h1
+	ip -netns $h1 li set r2h1 netns $r2 name eth0 up
 
-	ip -netns h2 li add eth0 type veth peer name r2h2
-	ip -netns h2 li set eth0 up
-	ip -netns h2 li set r2h2 netns r2 name eth2 up
+	ip -netns $h2 li add eth0 type veth peer name r2h2
+	ip -netns $h2 li set eth0 up
+	ip -netns $h2 li set r2h2 netns $r2 name eth2 up
 
-	ip -netns r1 li add eth1 type veth peer name r2r1
-	ip -netns r1 li set eth1 up
-	ip -netns r1 li set r2r1 netns r2 name eth1 up
+	ip -netns $r1 li add eth1 type veth peer name r2r1
+	ip -netns $r1 li set eth1 up
+	ip -netns $r1 li set r2r1 netns $r2 name eth1 up
 
 	#
 	# h1
 	#
 	if [ "${WITH_VRF}" = "yes" ]; then
-		create_vrf "h1"
+		create_vrf "$h1"
 		H1_VRF_ARG="vrf ${VRF}"
 		H1_PING_ARG="-I ${VRF}"
 	else
 		H1_VRF_ARG=
 		H1_PING_ARG=
 	fi
-	ip -netns h1 li add br0 type bridge
+	ip -netns $h1 li add br0 type bridge
 	if [ "${WITH_VRF}" = "yes" ]; then
-		ip -netns h1 li set br0 vrf ${VRF} up
+		ip -netns $h1 li set br0 vrf ${VRF} up
 	else
-		ip -netns h1 li set br0 up
+		ip -netns $h1 li set br0 up
 	fi
-	ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
-	ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
-	ip -netns h1 li set eth0 master br0 up
-	ip -netns h1 li set eth1 master br0 up
+	ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24
+	ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+	ip -netns $h1 li set eth0 master br0 up
+	ip -netns $h1 li set eth1 master br0 up
 
 	#
 	# h2
 	#
-	ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
-	ip -netns h2 ro add default via ${R2_N2_IP} dev eth0
-	ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
-	ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0
+	ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24
+	ip -netns $h2 ro add default via ${R2_N2_IP} dev eth0
+	ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+	ip -netns $h2 -6 ro add default via ${R2_N2_IP6} dev eth0
 
 	#
 	# r1
 	#
-	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
-	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
-	ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30
-	ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
+	ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns $r1 addr add dev eth1 ${R1_R2_N1_IP}/30
+	ip -netns $r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
 
 	#
 	# r2
 	#
-	ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
-	ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
-	ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30
-	ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
-	ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24
-	ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
+	ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24
+	ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+	ip -netns $r2 addr add dev eth1 ${R2_R1_N1_IP}/30
+	ip -netns $r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
+	ip -netns $r2 addr add dev eth2 ${R2_N2_IP}/24
+	ip -netns $r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
 
 	sleep 2
 
-	R1_LLADDR=$(get_linklocal r1 eth0)
+	R1_LLADDR=$(get_linklocal $r1 eth0)
 	if [ $? -ne 0 ]; then
 		echo "Error: Failed to get link-local address of r1's eth0"
 		exit 1
 	fi
 	log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}"
 
-	R2_LLADDR=$(get_linklocal r2 eth0)
+	R2_LLADDR=$(get_linklocal $r2 eth0)
 	if [ $? -ne 0 ]; then
 		echo "Error: Failed to get link-local address of r2's eth0"
 		exit 1
@@ -274,8 +272,8 @@ change_h2_mtu()
 {
 	local mtu=$1
 
-	run_cmd ip -netns h2 li set eth0 mtu ${mtu}
-	run_cmd ip -netns r2 li set eth2 mtu ${mtu}
+	run_cmd ip -netns $h2 li set eth0 mtu ${mtu}
+	run_cmd ip -netns $r2 li set eth2 mtu ${mtu}
 }
 
 check_exception()
@@ -287,63 +285,64 @@ check_exception()
 	# From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102)
 	if [ "$VERBOSE" = "1" ]; then
 		echo "Commands to check for exception:"
-		run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
-		run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
+		run_cmd ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
+		run_cmd ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
 	fi
 
 	if [ -n "${mtu}" ]; then
 		mtu=" mtu ${mtu}"
 	fi
 	if [ "$with_redirect" = "yes" ]; then
-		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
 		grep -q "cache <redirected> expires [0-9]*sec${mtu}"
 	elif [ -n "${mtu}" ]; then
-		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
 		grep -q "cache expires [0-9]*sec${mtu}"
 	else
 		# want to verify that neither mtu nor redirected appears in
 		# the route get output. The -v will wipe out the cache line
 		# if either are set so the last grep -q will not find a match
-		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
 		grep -E -v 'mtu|redirected' | grep -q "cache"
 	fi
-	log_test $? 0 "IPv4: ${desc}"
+	log_test $? 0 "IPv4: ${desc}" 0
 
-	if [ "$with_redirect" = "yes" ]; then
-		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
-		grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+	# No PMTU info for test "redirect" and "mtu exception plus redirect"
+	if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then
+		ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0"
 	elif [ -n "${mtu}" ]; then
-		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
 		grep -q "${mtu}"
 	else
 		# IPv6 is a bit harder. First strip out the match if it
 		# contains an mtu exception and then look for the first
 		# gateway - R1's lladdr
-		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
 		grep -v "mtu" | grep -q "${R1_LLADDR}"
 	fi
-	log_test $? 0 "IPv6: ${desc}"
+	log_test $? 0 "IPv6: ${desc}" 1
 }
 
 run_ping()
 {
 	local sz=$1
 
-	run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
-	run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
+	run_cmd ip netns exec $h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
+	run_cmd ip netns exec $h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
 }
 
 replace_route_new()
 {
 	# r1 to h2 via r2 and eth0
-	run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
-	run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
+	run_cmd ip -netns $r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
+	run_cmd ip -netns $r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
 }
 
 reset_route_new()
 {
-	run_cmd ip -netns r1 nexthop flush
-	run_cmd ip -netns h1 nexthop flush
+	run_cmd ip -netns $r1 nexthop flush
+	run_cmd ip -netns $h1 nexthop flush
 
 	initial_route_new
 }
@@ -351,34 +350,34 @@ reset_route_new()
 initial_route_new()
 {
 	# r1 to h2 via r2 and eth1
-	run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
-	run_cmd ip -netns r1 ro add ${H2_N2} nhid 1
+	run_cmd ip -netns $r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
+	run_cmd ip -netns $r1 ro add ${H2_N2} nhid 1
 
-	run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
-	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2
+	run_cmd ip -netns $r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
+	run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} nhid 2
 
 	# h1 to h2 via r1
-	run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0
-	run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
+	run_cmd ip -netns $h1 nexthop add id 1 via ${R1_N1_IP} dev br0
+	run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
 
-	run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0
-	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
+	run_cmd ip -netns $h1 nexthop add id 2 via ${R1_LLADDR} dev br0
+	run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
 }
 
 replace_route_legacy()
 {
 	# r1 to h2 via r2 and eth0
-	run_cmd ip -netns r1    ro replace ${H2_N2}   via ${R2_N1_IP}  dev eth0
-	run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
+	run_cmd ip -netns $r1    ro replace ${H2_N2}   via ${R2_N1_IP}  dev eth0
+	run_cmd ip -netns $r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
 }
 
 reset_route_legacy()
 {
-	run_cmd ip -netns r1    ro del ${H2_N2}
-	run_cmd ip -netns r1 -6 ro del ${H2_N2_6}
+	run_cmd ip -netns $r1    ro del ${H2_N2}
+	run_cmd ip -netns $r1 -6 ro del ${H2_N2_6}
 
-	run_cmd ip -netns h1    ro del ${H1_VRF_ARG} ${H2_N2}
-	run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
+	run_cmd ip -netns $h1    ro del ${H1_VRF_ARG} ${H2_N2}
+	run_cmd ip -netns $h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
 
 	initial_route_legacy
 }
@@ -386,22 +385,22 @@ reset_route_legacy()
 initial_route_legacy()
 {
 	# r1 to h2 via r2 and eth1
-	run_cmd ip -netns r1    ro add ${H2_N2}   via ${R2_R1_N1_IP}  dev eth1
-	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
+	run_cmd ip -netns $r1    ro add ${H2_N2}   via ${R2_R1_N1_IP}  dev eth1
+	run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
 
 	# h1 to h2 via r1
 	# - IPv6 redirect only works if gateway is the LLA
-	run_cmd ip -netns h1    ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
-	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
+	run_cmd ip -netns $h1    ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
+	run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
 }
 
 check_connectivity()
 {
 	local rc
 
-	run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
+	run_cmd ip netns exec $h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
 	rc=$?
-	run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
+	run_cmd ip netns exec $h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
 	[ $? -ne 0 ] && rc=$?
 
 	return $rc
@@ -488,6 +487,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 ret=0
 nsuccess=0
 nfail=0
+nxfail=0
 
 while getopts :pv o
 do
@@ -532,5 +532,6 @@ fi
 
 printf "\nTests passed: %3d\n" ${nsuccess}
 printf "Tests failed: %3d\n"   ${nfail}
+printf "Tests xfailed: %3d\n"  ${nxfail}
 
 exit $ret
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
new file mode 100644
index 000000000000..76e604e4810e
--- /dev/null
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: MIT */
+/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/io_uring.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#include <io_uring/mini_liburing.h>
+
+#define NOTIF_TAG 0xfffffffULL
+#define NONZC_TAG 0
+#define ZC_TAG 1
+
+enum {
+	MODE_NONZC	= 0,
+	MODE_ZC		= 1,
+	MODE_ZC_FIXED	= 2,
+	MODE_MIXED	= 3,
+};
+
+static bool cfg_cork		= false;
+static int  cfg_mode		= MODE_ZC_FIXED;
+static int  cfg_nr_reqs		= 8;
+static int  cfg_family		= PF_UNSPEC;
+static int  cfg_payload_len;
+static int  cfg_port		= 8000;
+static int  cfg_runtime_ms	= 4200;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static char payload[IP_MAXPACKET] __attribute__((aligned(4096)));
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+	if (setsockopt(fd, level, optname, &val, sizeof(val)))
+		error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+	int fd;
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+
+	if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+		error(1, errno, "connect");
+	return fd;
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	unsigned long packets = 0, bytes = 0;
+	struct io_uring ring;
+	struct iovec iov;
+	uint64_t tstop;
+	int i, fd, ret;
+	int compl_cqes = 0;
+
+	fd = do_setup_tx(domain, type, protocol);
+
+	ret = io_uring_queue_init(512, &ring, 0);
+	if (ret)
+		error(1, ret, "io_uring: queue init");
+
+	iov.iov_base = payload;
+	iov.iov_len = cfg_payload_len;
+
+	ret = io_uring_register_buffers(&ring, &iov, 1);
+	if (ret)
+		error(1, ret, "io_uring: buffer registration");
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (cfg_cork)
+			do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+		for (i = 0; i < cfg_nr_reqs; i++) {
+			unsigned zc_flags = 0;
+			unsigned buf_idx = 0;
+			unsigned mode = cfg_mode;
+			unsigned msg_flags = MSG_WAITALL;
+
+			if (cfg_mode == MODE_MIXED)
+				mode = rand() % 3;
+
+			sqe = io_uring_get_sqe(&ring);
+
+			if (mode == MODE_NONZC) {
+				io_uring_prep_send(sqe, fd, payload,
+						   cfg_payload_len, msg_flags);
+				sqe->user_data = NONZC_TAG;
+			} else {
+				io_uring_prep_sendzc(sqe, fd, payload,
+						     cfg_payload_len,
+						     msg_flags, zc_flags);
+				if (mode == MODE_ZC_FIXED) {
+					sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+					sqe->buf_index = buf_idx;
+				}
+				sqe->user_data = ZC_TAG;
+			}
+		}
+
+		ret = io_uring_submit(&ring);
+		if (ret != cfg_nr_reqs)
+			error(1, ret, "submit");
+
+		if (cfg_cork)
+			do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+		for (i = 0; i < cfg_nr_reqs; i++) {
+			ret = io_uring_wait_cqe(&ring, &cqe);
+			if (ret)
+				error(1, ret, "wait cqe");
+
+			if (cqe->user_data != NONZC_TAG &&
+			    cqe->user_data != ZC_TAG)
+				error(1, -EINVAL, "invalid cqe->user_data");
+
+			if (cqe->flags & IORING_CQE_F_NOTIF) {
+				if (cqe->flags & IORING_CQE_F_MORE)
+					error(1, -EINVAL, "invalid notif flags");
+				if (compl_cqes <= 0)
+					error(1, -EINVAL, "notification mismatch");
+				compl_cqes--;
+				i--;
+				io_uring_cqe_seen(&ring);
+				continue;
+			}
+			if (cqe->flags & IORING_CQE_F_MORE) {
+				if (cqe->user_data != ZC_TAG)
+					error(1, cqe->res, "unexpected F_MORE");
+				compl_cqes++;
+			}
+			if (cqe->res >= 0) {
+				packets++;
+				bytes += cqe->res;
+			} else if (cqe->res != -EAGAIN) {
+				error(1, cqe->res, "send failed");
+			}
+			io_uring_cqe_seen(&ring);
+		}
+	} while (gettimeofday_ms() < tstop);
+
+	while (compl_cqes) {
+		ret = io_uring_wait_cqe(&ring, &cqe);
+		if (ret)
+			error(1, ret, "wait cqe");
+		if (cqe->flags & IORING_CQE_F_MORE)
+			error(1, -EINVAL, "invalid notif flags");
+		if (!(cqe->flags & IORING_CQE_F_NOTIF))
+			error(1, -EINVAL, "missing notif flag");
+
+		io_uring_cqe_seen(&ring);
+		compl_cqes--;
+	}
+
+	fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n",
+			packets, bytes >> 20,
+			packets / (cfg_runtime_ms / 1000),
+			(bytes >> 20) / (cfg_runtime_ms / 1000));
+
+	if (close(fd))
+		error(1, errno, "close");
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+	int i;
+
+	for (i = 0; i < IP_MAXPACKET; i++)
+		payload[i] = 'a' + (i % 26);
+	do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s (-4|-6) (udp|tcp) -D<dst_ip> [-s<payload size>] "
+		    "[-t<time s>] [-n<batch>] [-p<port>] [-m<mode>]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const int max_payload_len = sizeof(payload) -
+				    sizeof(struct ipv6hdr) -
+				    sizeof(struct tcphdr) -
+				    40 /* max tcp options */;
+	struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr;
+	struct sockaddr_in *addr4 = (void *) &cfg_dst_addr;
+	char *daddr = NULL;
+	int c;
+
+	if (argc <= 1)
+		usage(argv[0]);
+	cfg_payload_len = max_payload_len;
+
+	while ((c = getopt(argc, argv, "46D:p:s:t:n:c:m:")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'D':
+			daddr = optarg;
+			break;
+		case 'p':
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 't':
+			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'n':
+			cfg_nr_reqs = strtoul(optarg, NULL, 0);
+			break;
+		case 'c':
+			cfg_cork = strtol(optarg, NULL, 0);
+			break;
+		case 'm':
+			cfg_mode = strtol(optarg, NULL, 0);
+			break;
+		}
+	}
+
+	switch (cfg_family) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (daddr &&
+		    inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", daddr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (daddr &&
+		    inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", daddr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+
+	if (cfg_payload_len > max_payload_len)
+		error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+	if (optind != argc - 1)
+		usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+	const char *cfg_test = argv[argc - 1];
+
+	parse_opts(argc, argv);
+
+	if (!strcmp(cfg_test, "tcp"))
+		do_test(cfg_family, SOCK_STREAM, 0);
+	else if (!strcmp(cfg_test, "udp"))
+		do_test(cfg_family, SOCK_DGRAM, 0);
+	else
+		error(1, 0, "unknown cfg_test %s", cfg_test);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
new file mode 100755
index 000000000000..123439545013
--- /dev/null
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN_TX="./io_uring_zerocopy_tx"
+readonly BIN_RX="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+	IPs=( "4" "6" )
+
+	for IP in "${IPs[@]}"; do
+		for mode in $(seq 1 3); do
+			$0 "$IP" udp -m "$mode" -t 1 -n 32
+			$0 "$IP" tcp -m "$mode" -t 1 -n 1
+		done
+	done
+
+	echo "OK. All tests passed"
+	exit 0
+fi
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+	echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+	exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+	readonly SADDR="${SADDR4}"
+	readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+	readonly SADDR="${SADDR6}"
+	readonly DADDR="${DADDR6}"
+else
+	echo "Invalid IP version ${IP}"
+	exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet:	use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+	RXMODE='raw'
+	;;
+*)
+	RXMODE="${TXMODE}"
+	;;
+esac
+
+# Start of state changes: install cleanup handler
+
+cleanup() {
+	ip netns del "${NS2}"
+	ip netns del "${NS1}"
+}
+
+trap cleanup EXIT
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+# Configure system settings
+ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000"
+ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+  peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+	local readonly ARGS="$1"
+
+	echo "ipv${IP} ${TXMODE} ${ARGS}"
+	ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" &
+	sleep 0.2
+	ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}"
+	wait
+}
+
+do_test "${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
new file mode 100755
index 000000000000..12491850ae98
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -0,0 +1,771 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
+# consistency directly inside packets on the receiver side. Tests are divided
+# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
+# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates
+# wider use cases that do not fall into the other two categories). Both OUTPUT
+# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
+# tests use the entire three-node topology (alpha, beta, gamma). Each test is
+# documented inside its own handler in the code below.
+#
+# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
+# When either Beta or Gamma is the destination (depending on the test category),
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+#
+#
+#            +-------------------+            +-------------------+
+#            |                   |            |                   |
+#            |    Alpha netns    |            |    Gamma netns    |
+#            |                   |            |                   |
+#            |  +-------------+  |            |  +-------------+  |
+#            |  |    veth0    |  |            |  |    veth0    |  |
+#            |  |  db01::2/64 |  |            |  |  db02::2/64 |  |
+#            |  +-------------+  |            |  +-------------+  |
+#            |         .         |            |         .         |
+#            +-------------------+            +-------------------+
+#                      .                                .
+#                      .                                .
+#                      .                                .
+#            +----------------------------------------------------+
+#            |         .                                .         |
+#            |  +-------------+                  +-------------+  |
+#            |  |    veth0    |                  |    veth1    |  |
+#            |  |  db01::1/64 | ................ |  db02::1/64 |  |
+#            |  +-------------+                  +-------------+  |
+#            |                                                    |
+#            |                      Beta netns                    |
+#            |                                                    |
+#            +----------------------------------------------------+
+#
+#
+#
+#        =============================================================
+#        |                Alpha - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 1                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 11111111                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 0xffff (default value)              |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 0xffffffff (default value)          |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 101                                 |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 101101                              |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee0                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf00dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 777                                 |
+#        +-----------------------------------------------------------+
+#        | Schema Data         | something that will be 4n-aligned   |
+#        +-----------------------------------------------------------+
+#
+#
+#        =============================================================
+#        |                 Beta - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 2                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 22222222                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 201                                 |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 201201                              |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 202                                 |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 202202                              |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee1                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf11dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 666                                 |
+#        +-----------------------------------------------------------+
+#        | Schema Data         | Hello there -Obi                    |
+#        +-----------------------------------------------------------+
+#
+#
+#        =============================================================
+#        |                Gamma - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 3                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 33333333                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 301                                 |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 301301                              |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 0xffff (default value)              |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 0xffffffff (default value)          |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee2                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf22dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 0xffffff (= None)                   |
+#        +-----------------------------------------------------------+
+#        | Schema Data         |                                     |
+#        +-----------------------------------------------------------+
+
+source lib.sh
+
+################################################################################
+#                                                                              #
+# WARNING: Be careful if you modify the block below - it MUST be kept          #
+#          synchronized with configurations inside ioam6_parser.c and always   #
+#          reflect the same.                                                   #
+#                                                                              #
+################################################################################
+
+ALPHA=(
+	1					# ID
+	11111111				# Wide ID
+	0xffff					# Ingress ID
+	0xffffffff				# Ingress Wide ID
+	101					# Egress ID
+	101101					# Egress Wide ID
+	0xdeadbee0				# Namespace Data
+	0xcafec0caf00dc0de			# Namespace Wide Data
+	777					# Schema ID (0xffffff = None)
+	"something that will be 4n-aligned"	# Schema Data
+)
+
+BETA=(
+	2
+	22222222
+	201
+	201201
+	202
+	202202
+	0xdeadbee1
+	0xcafec0caf11dc0de
+	666
+	"Hello there -Obi"
+)
+
+GAMMA=(
+	3
+	33333333
+	301
+	301301
+	0xffff
+	0xffffffff
+	0xdeadbee2
+	0xcafec0caf22dc0de
+	0xffffff
+	""
+)
+
+TESTS_OUTPUT="
+	out_undef_ns
+	out_no_room
+	out_bits
+	out_full_supp_trace
+"
+
+TESTS_INPUT="
+	in_undef_ns
+	in_no_room
+	in_oflag
+	in_bits
+	in_full_supp_trace
+"
+
+TESTS_GLOBAL="
+	fwd_full_supp_trace
+"
+
+
+################################################################################
+#                                                                              #
+#                                   LIBRARY                                    #
+#                                                                              #
+################################################################################
+
+check_kernel_compatibility()
+{
+  setup_ns ioam_tmp_node
+  ip link add name veth0 netns $ioam_tmp_node type veth \
+         peer name veth1 netns $ioam_tmp_node
+
+  ip -netns $ioam_tmp_node link set veth0 up
+  ip -netns $ioam_tmp_node link set veth1 up
+
+  ip -netns $ioam_tmp_node ioam namespace add 0
+  ns_ad=$?
+
+  ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0"
+  ns_sh=$?
+
+  if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+  then
+    echo "SKIP: kernel version probably too old, missing ioam support"
+    ip link del veth0 2>/dev/null || true
+    cleanup_ns $ioam_tmp_node || true
+    exit $ksft_skip
+  fi
+
+  ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \
+         trace prealloc type 0x800000 ns 0 size 4 dev veth0
+  tr_ad=$?
+
+  ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6"
+  tr_sh=$?
+
+  if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+  then
+    echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
+         "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
+    ip link del veth0 2>/dev/null || true
+    cleanup_ns $ioam_tmp_node || true
+    exit $ksft_skip
+  fi
+
+  ip link del veth0 2>/dev/null || true
+  cleanup_ns $ioam_tmp_node || true
+
+  lsmod | grep -q "ip6_tunnel"
+  ip6tnl_loaded=$?
+
+  if [ $ip6tnl_loaded = 0 ]
+  then
+    encap_tests=0
+  else
+    modprobe ip6_tunnel &>/dev/null
+    lsmod | grep -q "ip6_tunnel"
+    encap_tests=$?
+
+    if [ $encap_tests != 0 ]
+    then
+      ip a | grep -q "ip6tnl0"
+      encap_tests=$?
+
+      if [ $encap_tests != 0 ]
+      then
+        echo "Note: ip6_tunnel not found neither as a module nor inside the" \
+             "kernel, tests that require it (encap mode) will be omitted"
+      fi
+    fi
+  fi
+}
+
+cleanup()
+{
+  ip link del ioam-veth-alpha 2>/dev/null || true
+  ip link del ioam-veth-gamma 2>/dev/null || true
+
+  cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true
+
+  if [ $ip6tnl_loaded != 0 ]
+  then
+    modprobe -r ip6_tunnel 2>/dev/null || true
+  fi
+}
+
+setup()
+{
+  setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma
+
+  ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \
+         peer name ioam-veth-betaL netns $ioam_node_beta
+  ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \
+         peer name ioam-veth-gamma netns $ioam_node_gamma
+
+  ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0
+  ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0
+  ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1
+  ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0
+
+  ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0
+  ip -netns $ioam_node_alpha link set veth0 up
+  ip -netns $ioam_node_alpha link set lo up
+  ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0
+  ip -netns $ioam_node_alpha route del db01::/64
+  ip -netns $ioam_node_alpha route add db01::/64 dev veth0
+
+  ip -netns $ioam_node_beta addr add db01::1/64 dev veth0
+  ip -netns $ioam_node_beta addr add db02::1/64 dev veth1
+  ip -netns $ioam_node_beta link set veth0 up
+  ip -netns $ioam_node_beta link set veth1 up
+  ip -netns $ioam_node_beta link set lo up
+
+  ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0
+  ip -netns $ioam_node_gamma link set veth0 up
+  ip -netns $ioam_node_gamma link set lo up
+  ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0
+
+  # - IOAM config -
+  ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+  ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+  ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+  ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+  ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+  ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+  ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+  ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+  ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}"
+  ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]}
+
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+  ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+
+  sleep 1
+
+  ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+  if [ $? != 0 ]
+  then
+    echo "Setup FAILED"
+    cleanup &>/dev/null
+    exit 0
+  fi
+}
+
+log_test_passed()
+{
+  local desc=$1
+  printf "TEST: %-60s  [ OK ]\n" "${desc}"
+}
+
+log_test_failed()
+{
+  local desc=$1
+  printf "TEST: %-60s  [FAIL]\n" "${desc}"
+}
+
+log_results()
+{
+  echo "- Tests passed: ${npassed}"
+  echo "- Tests failed: ${nfailed}"
+}
+
+run_test()
+{
+  local name=$1
+  local desc=$2
+  local node_src=$3
+  local node_dst=$4
+  local ip6_dst=$5
+  local trace_type=$6
+  local ioam_ns=$7
+  local type=$8
+
+  ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type &
+  local spid=$!
+  sleep 0.1
+
+  ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+  if [ $? != 0 ]
+  then
+    nfailed=$((nfailed+1))
+    log_test_failed "${desc}"
+    kill -2 $spid &>/dev/null
+  else
+    wait $spid
+    if [ $? = 0 ]
+    then
+      npassed=$((npassed+1))
+      log_test_passed "${desc}"
+    else
+      nfailed=$((nfailed+1))
+      log_test_failed "${desc}"
+    fi
+  fi
+}
+
+run()
+{
+  echo
+  printf "%0.s-" {1..74}
+  echo
+  echo "OUTPUT tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  # set OUTPUT settings
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+
+  for t in $TESTS_OUTPUT
+  do
+    $t "inline"
+    [ $encap_tests = 0 ] && $t "encap"
+  done
+
+  # clean OUTPUT settings
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip -netns $ioam_node_alpha route change db01::/64 dev veth0
+
+
+  echo
+  printf "%0.s-" {1..74}
+  echo
+  echo "INPUT tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  # set INPUT settings
+  ip -netns $ioam_node_alpha ioam namespace del 123
+
+  for t in $TESTS_INPUT
+  do
+    $t "inline"
+    [ $encap_tests = 0 ] && $t "encap"
+  done
+
+  # clean INPUT settings
+  ip -netns $ioam_node_alpha ioam namespace add 123 \
+         data ${ALPHA[6]} wide ${ALPHA[7]}
+  ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]}
+  ip -netns $ioam_node_alpha route change db01::/64 dev veth0
+
+  echo
+  printf "%0.s-" {1..74}
+  echo
+  echo "GLOBAL tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  for t in $TESTS_GLOBAL
+  do
+    $t "inline"
+    [ $encap_tests = 0 ] && $t "encap"
+  done
+
+  echo
+  log_results
+}
+
+bit2type=(
+  0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
+  0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
+  0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+)
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+
+
+################################################################################
+#                                                                              #
+#                              OUTPUT tests                                    #
+#                                                                              #
+#   Two nodes (sender/receiver), IOAM disabled on ingress for the receiver.    #
+################################################################################
+
+out_undef_ns()
+{
+  ##############################################################################
+  # Make sure that the encap node won't fill the trace if the chosen IOAM      #
+  # namespace is not configured locally.                                       #
+  ##############################################################################
+  local desc="Unknown IOAM namespace"
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0x800000 ns 0 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+         db01::1 0x800000 0 $1
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+out_no_room()
+{
+  ##############################################################################
+  # Make sure that the encap node won't fill the trace and will set the        #
+  # Overflow flag since there is no room enough for its data.                  #
+  ##############################################################################
+  local desc="Missing trace room"
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+         db01::1 0xc00000 123 $1
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+out_bits()
+{
+  ##############################################################################
+  # Make sure that, for each trace type bit, the encap node will either:       #
+  #  (i)  fill the trace with its data when it is a supported bit              #
+  #  (ii) not fill the trace with its data when it is an unsupported bit       #
+  ##############################################################################
+  local desc="Trace type with bit <n> only"
+
+  local tmp=${bit2size[22]}
+  bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  for i in {0..22}
+  do
+    ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+           trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
+           dev veth0 &>/dev/null
+
+    local cmd_res=$?
+    local descr="${desc/<n>/$i}"
+
+    if [[ $i -ge 12 && $i -le 21 ]]
+    then
+      if [ $cmd_res != 0 ]
+      then
+        npassed=$((npassed+1))
+        log_test_passed "$descr ($1 mode)"
+      else
+        nfailed=$((nfailed+1))
+        log_test_failed "$descr ($1 mode)"
+      fi
+    else
+	run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \
+           $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
+    fi
+  done
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+
+  bit2size[22]=$tmp
+}
+
+out_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that the encap node will correctly fill a full trace. Be careful,#
+  # "full trace" here does NOT mean all bits (only supported ones).            #
+  ##############################################################################
+  local desc="Full supported trace"
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xfff002 ns 123 size 100 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+         db01::1 0xfff002 123 $1
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+
+################################################################################
+#                                                                              #
+#                               INPUT tests                                    #
+#                                                                              #
+#     Two nodes (sender/receiver), the sender MUST NOT fill the trace upon     #
+#     insertion -> the IOAM namespace configured on the sender is removed      #
+#     and is used in the inserted trace to force the sender not to fill it.    #
+################################################################################
+
+in_undef_ns()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace if the related IOAM #
+  # namespace is not configured locally.                                       #
+  ##############################################################################
+  local desc="Unknown IOAM namespace"
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0x800000 ns 0 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+         db01::1 0x800000 0 $1
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+in_no_room()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace and will set the    #
+  # Overflow flag if there is no room enough for its data.                     #
+  ##############################################################################
+  local desc="Missing trace room"
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+         db01::1 0xc00000 123 $1
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+in_bits()
+{
+  ##############################################################################
+  # Make sure that, for each trace type bit, the receiving node will either:   #
+  #  (i)  fill the trace with its data when it is a supported bit              #
+  #  (ii) not fill the trace with its data when it is an unsupported bit       #
+  ##############################################################################
+  local desc="Trace type with bit <n> only"
+
+  local tmp=${bit2size[22]}
+  bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  for i in {0..11} {22..22}
+  do
+    ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+           trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
+           dev veth0
+
+    run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \
+           $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
+  done
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+
+  bit2size[22]=$tmp
+}
+
+in_oflag()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace since the Overflow  #
+  # flag is set.                                                               #
+  ##############################################################################
+  local desc="Overflow flag is set"
+
+  # Exception:
+  #   Here, we need the sender to set the Overflow flag. For that, we will add
+  #   back the IOAM namespace that was previously configured on the sender.
+  ip -netns $ioam_node_alpha ioam namespace add 123
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+         db01::1 0xc00000 123 $1
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+
+  # And we clean the exception for this test to get things back to normal for
+  # other INPUT tests
+  ip -netns $ioam_node_alpha ioam namespace del 123
+}
+
+in_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that the receiving node will correctly fill a full trace. Be     #
+  # careful, "full trace" here does NOT mean all bits (only supported ones).   #
+  ##############################################################################
+  local desc="Full supported trace"
+
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xfff002 ns 123 size 80 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+         db01::1 0xfff002 123 $1
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+
+################################################################################
+#                                                                              #
+#                              GLOBAL tests                                    #
+#                                                                              #
+#   Three nodes (sender/router/receiver), IOAM fully enabled on every node.    #
+################################################################################
+
+fwd_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that all three nodes correctly filled the full supported trace   #
+  # by checking that the trace data is consistent with the predefined config.  #
+  ##############################################################################
+  local desc="Forward - Full supported trace"
+
+  [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up
+
+  ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \
+         db02::2 0xfff002 123 $1
+
+  [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down
+}
+
+
+################################################################################
+#                                                                              #
+#                                     MAIN                                     #
+#                                                                              #
+################################################################################
+
+npassed=0
+nfailed=0
+
+if [ "$(id -u)" -ne 0 ]
+then
+  echo "SKIP: Need root privileges"
+  exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]
+then
+  echo "SKIP: Could not run test without ip tool"
+  exit $ksft_skip
+fi
+
+ip ioam &>/dev/null
+if [ $? = 1 ]
+then
+  echo "SKIP: iproute2 too old, missing ioam command"
+  exit $ksft_skip
+fi
+
+check_kernel_compatibility
+
+cleanup &>/dev/null
+setup
+run
+cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
new file mode 100644
index 000000000000..895e5bb5044b
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Justin Iurman (justin.iurman@uliege.be)
+ *
+ * IOAM tester for IPv6, see ioam6.sh for details on each test case.
+ */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/const.h>
+#include <linux/ioam6.h>
+#include <linux/ipv6.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct ioam_config {
+	__u32 id;
+	__u64 wide;
+	__u16 ingr_id;
+	__u16 egr_id;
+	__u32 ingr_wide;
+	__u32 egr_wide;
+	__u32 ns_data;
+	__u64 ns_wide;
+	__u32 sc_id;
+	__u8 hlim;
+	char *sc_data;
+};
+
+/*
+ * Be careful if you modify structs below - everything MUST be kept synchronized
+ * with configurations inside ioam6.sh and always reflect the same.
+ */
+
+static struct ioam_config node1 = {
+	.id = 1,
+	.wide = 11111111,
+	.ingr_id = 0xffff, /* default value */
+	.egr_id = 101,
+	.ingr_wide = 0xffffffff, /* default value */
+	.egr_wide = 101101,
+	.ns_data = 0xdeadbee0,
+	.ns_wide = 0xcafec0caf00dc0de,
+	.sc_id = 777,
+	.sc_data = "something that will be 4n-aligned",
+	.hlim = 64,
+};
+
+static struct ioam_config node2 = {
+	.id = 2,
+	.wide = 22222222,
+	.ingr_id = 201,
+	.egr_id = 202,
+	.ingr_wide = 201201,
+	.egr_wide = 202202,
+	.ns_data = 0xdeadbee1,
+	.ns_wide = 0xcafec0caf11dc0de,
+	.sc_id = 666,
+	.sc_data = "Hello there -Obi",
+	.hlim = 63,
+};
+
+static struct ioam_config node3 = {
+	.id = 3,
+	.wide = 33333333,
+	.ingr_id = 301,
+	.egr_id = 0xffff, /* default value */
+	.ingr_wide = 301301,
+	.egr_wide = 0xffffffff, /* default value */
+	.ns_data = 0xdeadbee2,
+	.ns_wide = 0xcafec0caf22dc0de,
+	.sc_id = 0xffffff, /* default value */
+	.sc_data = NULL,
+	.hlim = 62,
+};
+
+enum {
+	/**********
+	 * OUTPUT *
+	 **********/
+	TEST_OUT_UNDEF_NS,
+	TEST_OUT_NO_ROOM,
+	TEST_OUT_BIT0,
+	TEST_OUT_BIT1,
+	TEST_OUT_BIT2,
+	TEST_OUT_BIT3,
+	TEST_OUT_BIT4,
+	TEST_OUT_BIT5,
+	TEST_OUT_BIT6,
+	TEST_OUT_BIT7,
+	TEST_OUT_BIT8,
+	TEST_OUT_BIT9,
+	TEST_OUT_BIT10,
+	TEST_OUT_BIT11,
+	TEST_OUT_BIT22,
+	TEST_OUT_FULL_SUPP_TRACE,
+
+	/*********
+	 * INPUT *
+	 *********/
+	TEST_IN_UNDEF_NS,
+	TEST_IN_NO_ROOM,
+	TEST_IN_OFLAG,
+	TEST_IN_BIT0,
+	TEST_IN_BIT1,
+	TEST_IN_BIT2,
+	TEST_IN_BIT3,
+	TEST_IN_BIT4,
+	TEST_IN_BIT5,
+	TEST_IN_BIT6,
+	TEST_IN_BIT7,
+	TEST_IN_BIT8,
+	TEST_IN_BIT9,
+	TEST_IN_BIT10,
+	TEST_IN_BIT11,
+	TEST_IN_BIT22,
+	TEST_IN_FULL_SUPP_TRACE,
+
+	/**********
+	 * GLOBAL *
+	 **********/
+	TEST_FWD_FULL_SUPP_TRACE,
+
+	__TEST_MAX,
+};
+
+static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
+			     __u32 trace_type, __u16 ioam_ns)
+{
+	if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
+	    __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+		return 1;
+
+	switch (tid) {
+	case TEST_OUT_UNDEF_NS:
+	case TEST_IN_UNDEF_NS:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen != 1 ||
+		       ioam6h->remlen != 1;
+
+	case TEST_OUT_NO_ROOM:
+	case TEST_IN_NO_ROOM:
+	case TEST_IN_OFLAG:
+		return !ioam6h->overflow ||
+		       ioam6h->nodelen != 2 ||
+		       ioam6h->remlen != 1;
+
+	case TEST_OUT_BIT0:
+	case TEST_IN_BIT0:
+	case TEST_OUT_BIT1:
+	case TEST_IN_BIT1:
+	case TEST_OUT_BIT2:
+	case TEST_IN_BIT2:
+	case TEST_OUT_BIT3:
+	case TEST_IN_BIT3:
+	case TEST_OUT_BIT4:
+	case TEST_IN_BIT4:
+	case TEST_OUT_BIT5:
+	case TEST_IN_BIT5:
+	case TEST_OUT_BIT6:
+	case TEST_IN_BIT6:
+	case TEST_OUT_BIT7:
+	case TEST_IN_BIT7:
+	case TEST_OUT_BIT11:
+	case TEST_IN_BIT11:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen != 1 ||
+		       ioam6h->remlen;
+
+	case TEST_OUT_BIT8:
+	case TEST_IN_BIT8:
+	case TEST_OUT_BIT9:
+	case TEST_IN_BIT9:
+	case TEST_OUT_BIT10:
+	case TEST_IN_BIT10:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen != 2 ||
+		       ioam6h->remlen;
+
+	case TEST_OUT_BIT22:
+	case TEST_IN_BIT22:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen ||
+		       ioam6h->remlen;
+
+	case TEST_OUT_FULL_SUPP_TRACE:
+	case TEST_IN_FULL_SUPP_TRACE:
+	case TEST_FWD_FULL_SUPP_TRACE:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen != 15 ||
+		       ioam6h->remlen;
+
+	default:
+		break;
+	}
+
+	return 1;
+}
+
+static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
+			    const struct ioam_config cnf)
+{
+	unsigned int len;
+	__u8 aligned;
+	__u64 raw64;
+	__u32 raw32;
+
+	if (ioam6h->type.bit0) {
+		raw32 = __be32_to_cpu(*((__u32 *)*p));
+		if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit1) {
+		raw32 = __be32_to_cpu(*((__u32 *)*p));
+		if (cnf.ingr_id != (raw32 >> 16) ||
+		    cnf.egr_id != (raw32 & 0xffff))
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit2)
+		*p += sizeof(__u32);
+
+	if (ioam6h->type.bit3)
+		*p += sizeof(__u32);
+
+	if (ioam6h->type.bit4) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit5) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit6)
+		*p += sizeof(__u32);
+
+	if (ioam6h->type.bit7) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit8) {
+		raw64 = __be64_to_cpu(*((__u64 *)*p));
+		if (cnf.hlim != (raw64 >> 56) ||
+		    cnf.wide != (raw64 & 0xffffffffffffff))
+			return 1;
+		*p += sizeof(__u64);
+	}
+
+	if (ioam6h->type.bit9) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
+			return 1;
+		*p += sizeof(__u32);
+
+		if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit10) {
+		if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+			return 1;
+		*p += sizeof(__u64);
+	}
+
+	if (ioam6h->type.bit11) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit12) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit13) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit14) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit15) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit16) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit17) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit18) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit19) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit20) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit21) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit22) {
+		len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
+		aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
+
+		raw32 = __be32_to_cpu(*((__u32 *)*p));
+		if (aligned != (raw32 >> 24) * 4 ||
+		    cnf.sc_id != (raw32 & 0xffffff))
+			return 1;
+		*p += sizeof(__u32);
+
+		if (cnf.sc_data) {
+			if (strncmp((char *)*p, cnf.sc_data, len))
+				return 1;
+
+			*p += len;
+			aligned -= len;
+
+			while (aligned--) {
+				if (**p != '\0')
+					return 1;
+				*p += sizeof(__u8);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
+				      __u32 trace_type, __u16 ioam_ns)
+{
+	__u8 *p;
+
+	if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+		return 1;
+
+	p = ioam6h->data + ioam6h->remlen * 4;
+
+	switch (tid) {
+	case TEST_OUT_BIT0:
+	case TEST_OUT_BIT1:
+	case TEST_OUT_BIT2:
+	case TEST_OUT_BIT3:
+	case TEST_OUT_BIT4:
+	case TEST_OUT_BIT5:
+	case TEST_OUT_BIT6:
+	case TEST_OUT_BIT7:
+	case TEST_OUT_BIT8:
+	case TEST_OUT_BIT9:
+	case TEST_OUT_BIT10:
+	case TEST_OUT_BIT11:
+	case TEST_OUT_BIT22:
+	case TEST_OUT_FULL_SUPP_TRACE:
+		return check_ioam6_data(&p, ioam6h, node1);
+
+	case TEST_IN_BIT0:
+	case TEST_IN_BIT1:
+	case TEST_IN_BIT2:
+	case TEST_IN_BIT3:
+	case TEST_IN_BIT4:
+	case TEST_IN_BIT5:
+	case TEST_IN_BIT6:
+	case TEST_IN_BIT7:
+	case TEST_IN_BIT8:
+	case TEST_IN_BIT9:
+	case TEST_IN_BIT10:
+	case TEST_IN_BIT11:
+	case TEST_IN_BIT22:
+	case TEST_IN_FULL_SUPP_TRACE:
+	{
+		__u32 tmp32 = node2.egr_wide;
+		__u16 tmp16 = node2.egr_id;
+		int res;
+
+		node2.egr_id = 0xffff;
+		node2.egr_wide = 0xffffffff;
+
+		res = check_ioam6_data(&p, ioam6h, node2);
+
+		node2.egr_id = tmp16;
+		node2.egr_wide = tmp32;
+
+		return res;
+	}
+
+	case TEST_FWD_FULL_SUPP_TRACE:
+		if (check_ioam6_data(&p, ioam6h, node3))
+			return 1;
+		if (check_ioam6_data(&p, ioam6h, node2))
+			return 1;
+		return check_ioam6_data(&p, ioam6h, node1);
+
+	default:
+		break;
+	}
+
+	return 1;
+}
+
+static int str2id(const char *tname)
+{
+	if (!strcmp("out_undef_ns", tname))
+		return TEST_OUT_UNDEF_NS;
+	if (!strcmp("out_no_room", tname))
+		return TEST_OUT_NO_ROOM;
+	if (!strcmp("out_bit0", tname))
+		return TEST_OUT_BIT0;
+	if (!strcmp("out_bit1", tname))
+		return TEST_OUT_BIT1;
+	if (!strcmp("out_bit2", tname))
+		return TEST_OUT_BIT2;
+	if (!strcmp("out_bit3", tname))
+		return TEST_OUT_BIT3;
+	if (!strcmp("out_bit4", tname))
+		return TEST_OUT_BIT4;
+	if (!strcmp("out_bit5", tname))
+		return TEST_OUT_BIT5;
+	if (!strcmp("out_bit6", tname))
+		return TEST_OUT_BIT6;
+	if (!strcmp("out_bit7", tname))
+		return TEST_OUT_BIT7;
+	if (!strcmp("out_bit8", tname))
+		return TEST_OUT_BIT8;
+	if (!strcmp("out_bit9", tname))
+		return TEST_OUT_BIT9;
+	if (!strcmp("out_bit10", tname))
+		return TEST_OUT_BIT10;
+	if (!strcmp("out_bit11", tname))
+		return TEST_OUT_BIT11;
+	if (!strcmp("out_bit22", tname))
+		return TEST_OUT_BIT22;
+	if (!strcmp("out_full_supp_trace", tname))
+		return TEST_OUT_FULL_SUPP_TRACE;
+	if (!strcmp("in_undef_ns", tname))
+		return TEST_IN_UNDEF_NS;
+	if (!strcmp("in_no_room", tname))
+		return TEST_IN_NO_ROOM;
+	if (!strcmp("in_oflag", tname))
+		return TEST_IN_OFLAG;
+	if (!strcmp("in_bit0", tname))
+		return TEST_IN_BIT0;
+	if (!strcmp("in_bit1", tname))
+		return TEST_IN_BIT1;
+	if (!strcmp("in_bit2", tname))
+		return TEST_IN_BIT2;
+	if (!strcmp("in_bit3", tname))
+		return TEST_IN_BIT3;
+	if (!strcmp("in_bit4", tname))
+		return TEST_IN_BIT4;
+	if (!strcmp("in_bit5", tname))
+		return TEST_IN_BIT5;
+	if (!strcmp("in_bit6", tname))
+		return TEST_IN_BIT6;
+	if (!strcmp("in_bit7", tname))
+		return TEST_IN_BIT7;
+	if (!strcmp("in_bit8", tname))
+		return TEST_IN_BIT8;
+	if (!strcmp("in_bit9", tname))
+		return TEST_IN_BIT9;
+	if (!strcmp("in_bit10", tname))
+		return TEST_IN_BIT10;
+	if (!strcmp("in_bit11", tname))
+		return TEST_IN_BIT11;
+	if (!strcmp("in_bit22", tname))
+		return TEST_IN_BIT22;
+	if (!strcmp("in_full_supp_trace", tname))
+		return TEST_IN_FULL_SUPP_TRACE;
+	if (!strcmp("fwd_full_supp_trace", tname))
+		return TEST_FWD_FULL_SUPP_TRACE;
+
+	return -1;
+}
+
+static int get_u32(__u32 *val, const char *arg, int base)
+{
+	unsigned long res;
+	char *ptr;
+
+	if (!arg || !*arg)
+		return -1;
+	res = strtoul(arg, &ptr, base);
+
+	if (!ptr || ptr == arg || *ptr)
+		return -1;
+
+	if (res == ULONG_MAX && errno == ERANGE)
+		return -1;
+
+	if (res > 0xFFFFFFFFUL)
+		return -1;
+
+	*val = res;
+	return 0;
+}
+
+static int get_u16(__u16 *val, const char *arg, int base)
+{
+	unsigned long res;
+	char *ptr;
+
+	if (!arg || !*arg)
+		return -1;
+	res = strtoul(arg, &ptr, base);
+
+	if (!ptr || ptr == arg || *ptr)
+		return -1;
+
+	if (res == ULONG_MAX && errno == ERANGE)
+		return -1;
+
+	if (res > 0xFFFFUL)
+		return -1;
+
+	*val = res;
+	return 0;
+}
+
+static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
+	[TEST_OUT_UNDEF_NS]		= check_ioam_header,
+	[TEST_OUT_NO_ROOM]		= check_ioam_header,
+	[TEST_OUT_BIT0]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT1]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT2]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT3]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT4]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT5]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT6]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT7]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT8]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT9]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT10]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT11]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT22]		= check_ioam_header_and_data,
+	[TEST_OUT_FULL_SUPP_TRACE]	= check_ioam_header_and_data,
+	[TEST_IN_UNDEF_NS]		= check_ioam_header,
+	[TEST_IN_NO_ROOM]		= check_ioam_header,
+	[TEST_IN_OFLAG]		= check_ioam_header,
+	[TEST_IN_BIT0]			= check_ioam_header_and_data,
+	[TEST_IN_BIT1]			= check_ioam_header_and_data,
+	[TEST_IN_BIT2]			= check_ioam_header_and_data,
+	[TEST_IN_BIT3]			= check_ioam_header_and_data,
+	[TEST_IN_BIT4]			= check_ioam_header_and_data,
+	[TEST_IN_BIT5]			= check_ioam_header_and_data,
+	[TEST_IN_BIT6]			= check_ioam_header_and_data,
+	[TEST_IN_BIT7]			= check_ioam_header_and_data,
+	[TEST_IN_BIT8]			= check_ioam_header_and_data,
+	[TEST_IN_BIT9]			= check_ioam_header_and_data,
+	[TEST_IN_BIT10]		= check_ioam_header_and_data,
+	[TEST_IN_BIT11]		= check_ioam_header_and_data,
+	[TEST_IN_BIT22]		= check_ioam_header_and_data,
+	[TEST_IN_FULL_SUPP_TRACE]	= check_ioam_header_and_data,
+	[TEST_FWD_FULL_SUPP_TRACE]	= check_ioam_header_and_data,
+};
+
+int main(int argc, char **argv)
+{
+	int fd, size, hoplen, tid, ret = 1, on = 1;
+	struct ioam6_hdr *opt;
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	struct iovec iov;
+	__u8 buffer[512];
+	__u32 tr_type;
+	__u16 ioam_ns;
+	__u8 *ptr;
+
+	if (argc != 5)
+		goto out;
+
+	tid = str2id(argv[1]);
+	if (tid < 0 || !func[tid])
+		goto out;
+
+	if (get_u32(&tr_type, argv[2], 16) ||
+	    get_u16(&ioam_ns, argv[3], 0))
+		goto out;
+
+	fd = socket(PF_INET6, SOCK_RAW,
+		    !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6);
+	if (fd < 0)
+		goto out;
+
+	setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS,  &on, sizeof(on));
+
+	iov.iov_len = 1;
+	iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer)));
+	if (!iov.iov_base)
+		goto close;
+recv:
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = buffer;
+	msg.msg_controllen = CMSG_SPACE(sizeof(buffer));
+
+	size = recvmsg(fd, &msg, 0);
+	if (size <= 0)
+		goto close;
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_level != IPPROTO_IPV6 ||
+		    cmsg->cmsg_type != IPV6_HOPOPTS ||
+		    cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr))
+			continue;
+
+		ptr = (__u8 *)CMSG_DATA(cmsg);
+
+		hoplen = (ptr[1] + 1) << 3;
+		ptr += sizeof(struct ipv6_hopopt_hdr);
+
+		while (hoplen > 0) {
+			opt = (struct ioam6_hdr *)ptr;
+
+			if (opt->opt_type == IPV6_TLV_IOAM &&
+			    opt->type == IOAM6_TYPE_PREALLOC) {
+				ptr += sizeof(*opt);
+				ret = func[tid](tid,
+						(struct ioam6_trace_hdr *)ptr,
+						tr_type, ioam_ns);
+				goto close;
+			}
+
+			ptr += opt->opt_len + 2;
+			hoplen -= opt->opt_len + 2;
+		}
+	}
+
+	goto recv;
+close:
+	free(iov.iov_base);
+	close(fd);
+out:
+	return ret;
+}
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
new file mode 100644
index 000000000000..193b82745fd8
--- /dev/null
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2023 Cloudflare
+
+/* Test IP_LOCAL_PORT_RANGE socket option: IPv4 + IPv6, TCP + UDP.
+ *
+ * Tests assume that net.ipv4.ip_local_port_range is [40000, 49999].
+ * Don't run these directly but with ip_local_port_range.sh script.
+ */
+
+#include <fcntl.h>
+#include <netinet/ip.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef IP_LOCAL_PORT_RANGE
+#define IP_LOCAL_PORT_RANGE 51
+#endif
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+static __u32 pack_port_range(__u16 lo, __u16 hi)
+{
+	return (hi << 16) | (lo << 0);
+}
+
+static void unpack_port_range(__u32 range, __u16 *lo, __u16 *hi)
+{
+	*lo = range & 0xffff;
+	*hi = range >> 16;
+}
+
+static int get_so_domain(int fd)
+{
+	int domain, err;
+	socklen_t len;
+
+	len = sizeof(domain);
+	err = getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &domain, &len);
+	if (err)
+		return -1;
+
+	return domain;
+}
+
+static int bind_to_loopback_any_port(int fd)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in v4;
+		struct sockaddr_in6 v6;
+	} addr;
+	socklen_t addr_len;
+
+	memset(&addr, 0, sizeof(addr));
+	switch (get_so_domain(fd)) {
+	case AF_INET:
+		addr.v4.sin_family = AF_INET;
+		addr.v4.sin_port = htons(0);
+		addr.v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		addr_len = sizeof(addr.v4);
+		break;
+	case AF_INET6:
+		addr.v6.sin6_family = AF_INET6;
+		addr.v6.sin6_port = htons(0);
+		addr.v6.sin6_addr = in6addr_loopback;
+		addr_len = sizeof(addr.v6);
+		break;
+	default:
+		return -1;
+	}
+
+	return bind(fd, &addr.sa, addr_len);
+}
+
+static int get_sock_port(int fd)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in v4;
+		struct sockaddr_in6 v6;
+	} addr;
+	socklen_t addr_len;
+	int err;
+
+	addr_len = sizeof(addr);
+	memset(&addr, 0, sizeof(addr));
+	err = getsockname(fd, &addr.sa, &addr_len);
+	if (err)
+		return -1;
+
+	switch (addr.sa.sa_family) {
+	case AF_INET:
+		return ntohs(addr.v4.sin_port);
+	case AF_INET6:
+		return ntohs(addr.v6.sin6_port);
+	default:
+		errno = EAFNOSUPPORT;
+		return -1;
+	}
+}
+
+static int get_ip_local_port_range(int fd, __u32 *range)
+{
+	socklen_t len;
+	__u32 val;
+	int err;
+
+	len = sizeof(val);
+	err = getsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val, &len);
+	if (err)
+		return -1;
+
+	*range = val;
+	return 0;
+}
+
+FIXTURE(ip_local_port_range) {};
+
+FIXTURE_SETUP(ip_local_port_range)
+{
+}
+
+FIXTURE_TEARDOWN(ip_local_port_range)
+{
+}
+
+FIXTURE_VARIANT(ip_local_port_range) {
+	int so_domain;
+	int so_type;
+	int so_protocol;
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_tcp) {
+	.so_domain	= AF_INET,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_udp) {
+	.so_domain	= AF_INET,
+	.so_type	= SOCK_DGRAM,
+	.so_protocol	= 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) {
+	.so_domain	= AF_INET,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= IPPROTO_SCTP,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) {
+	.so_domain	= AF_INET,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= IPPROTO_MPTCP,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) {
+	.so_domain	= AF_INET6,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_udp) {
+	.so_domain	= AF_INET6,
+	.so_type	= SOCK_DGRAM,
+	.so_protocol	= 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) {
+	.so_domain	= AF_INET6,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= IPPROTO_SCTP,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) {
+	.so_domain	= AF_INET6,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= IPPROTO_MPTCP,
+};
+
+TEST_F(ip_local_port_range, invalid_option_value)
+{
+	__u16 val16;
+	__u32 val32;
+	__u64 val64;
+	int fd, err;
+
+	fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+	ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+	/* Too few bytes */
+	val16 = 40000;
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val16, sizeof(val16));
+	EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+	EXPECT_EQ(errno, EINVAL);
+
+	/* Empty range: low port > high port */
+	val32 = pack_port_range(40222, 40111);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val32, sizeof(val32));
+	EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+	EXPECT_EQ(errno, EINVAL);
+
+	/* Too many bytes */
+	val64 = pack_port_range(40333, 40444);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val64, sizeof(val64));
+	EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+	EXPECT_EQ(errno, EINVAL);
+
+	err = close(fd);
+	ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+TEST_F(ip_local_port_range, port_range_out_of_netns_range)
+{
+	const struct test {
+		__u16 range_lo;
+		__u16 range_hi;
+	} tests[] = {
+		{ 30000, 39999 }, /* socket range below netns range */
+		{ 50000, 59999 }, /* socket range above netns range */
+	};
+	const struct test *t;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		/* Bind a couple of sockets, not just one, to check
+		 * that the range wasn't clamped to a single port from
+		 * the netns range. That is [40000, 40000] or [49999,
+		 * 49999], respectively for each test case.
+		 */
+		int fds[2], i;
+
+		TH_LOG("lo %5hu, hi %5hu", t->range_lo, t->range_hi);
+
+		for (i = 0; i < ARRAY_SIZE(fds); i++) {
+			int fd, err, port;
+			__u32 range;
+
+			fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+			ASSERT_GE(fd, 0) TH_LOG("#%d: socket failed", i);
+
+			range = pack_port_range(t->range_lo, t->range_hi);
+			err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+			ASSERT_TRUE(!err) TH_LOG("#%d: setsockopt(IP_LOCAL_PORT_RANGE) failed", i);
+
+			err = bind_to_loopback_any_port(fd);
+			ASSERT_TRUE(!err) TH_LOG("#%d: bind failed", i);
+
+			/* Check that socket port range outside of ephemeral range is ignored */
+			port = get_sock_port(fd);
+			ASSERT_GE(port, 40000) TH_LOG("#%d: expected port within netns range", i);
+			ASSERT_LE(port, 49999) TH_LOG("#%d: expected port within netns range", i);
+
+			fds[i] = fd;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(fds); i++)
+			ASSERT_TRUE(close(fds[i]) == 0) TH_LOG("#%d: close failed", i);
+	}
+}
+
+TEST_F(ip_local_port_range, single_port_range)
+{
+	const struct test {
+		__u16 range_lo;
+		__u16 range_hi;
+		__u16 expected;
+	} tests[] = {
+		/* single port range within ephemeral range */
+		{ 45000, 45000, 45000 },
+		/* first port in the ephemeral range (clamp from above) */
+		{ 0, 40000, 40000 },
+		/* last port in the ephemeral range (clamp from below)  */
+		{ 49999, 0, 49999 },
+	};
+	const struct test *t;
+
+	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+		int fd, err, port;
+		__u32 range;
+
+		TH_LOG("lo %5hu, hi %5hu, expected %5hu",
+		       t->range_lo, t->range_hi, t->expected);
+
+		fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+		ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+		range = pack_port_range(t->range_lo, t->range_hi);
+		err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+		ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+		err = bind_to_loopback_any_port(fd);
+		ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+		port = get_sock_port(fd);
+		ASSERT_EQ(port, t->expected) TH_LOG("unexpected local port");
+
+		err = close(fd);
+		ASSERT_TRUE(!err) TH_LOG("close failed");
+	}
+}
+
+TEST_F(ip_local_port_range, exhaust_8_port_range)
+{
+	__u8 port_set = 0;
+	int i, fd, err;
+	__u32 range;
+	__u16 port;
+	int fds[8];
+
+	for (i = 0; i < ARRAY_SIZE(fds); i++) {
+		fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+		ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+		range = pack_port_range(40000, 40007);
+		err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+		ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+		err = bind_to_loopback_any_port(fd);
+		ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+		port = get_sock_port(fd);
+		ASSERT_GE(port, 40000) TH_LOG("expected port within sockopt range");
+		ASSERT_LE(port, 40007) TH_LOG("expected port within sockopt range");
+
+		port_set |= 1 << (port - 40000);
+		fds[i] = fd;
+	}
+
+	/* Check that all every port from the test range is in use */
+	ASSERT_EQ(port_set, 0xff) TH_LOG("expected all ports to be busy");
+
+	/* Check that bind() fails because the whole range is busy */
+	fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+	ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+	range = pack_port_range(40000, 40007);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	err = bind_to_loopback_any_port(fd);
+	ASSERT_TRUE(err) TH_LOG("expected bind to fail");
+	ASSERT_EQ(errno, EADDRINUSE);
+
+	err = close(fd);
+	ASSERT_TRUE(!err) TH_LOG("close failed");
+
+	for (i = 0; i < ARRAY_SIZE(fds); i++) {
+		err = close(fds[i]);
+		ASSERT_TRUE(!err) TH_LOG("close failed");
+	}
+}
+
+TEST_F(ip_local_port_range, late_bind)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in v4;
+		struct sockaddr_in6 v6;
+	} addr;
+	socklen_t addr_len;
+	const int one = 1;
+	int fd, err;
+	__u32 range;
+	__u16 port;
+
+	fd = socket(variant->so_domain, variant->so_type, 0);
+	ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+	range = pack_port_range(40100, 40199);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	err = setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_BIND_ADDRESS_NO_PORT) failed");
+
+	err = bind_to_loopback_any_port(fd);
+	ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+	port = get_sock_port(fd);
+	ASSERT_EQ(port, 0) TH_LOG("getsockname failed");
+
+	/* Invalid destination */
+	memset(&addr, 0, sizeof(addr));
+	switch (variant->so_domain) {
+	case AF_INET:
+		addr.v4.sin_family = AF_INET;
+		addr.v4.sin_port = htons(0);
+		addr.v4.sin_addr.s_addr = htonl(INADDR_ANY);
+		addr_len = sizeof(addr.v4);
+		break;
+	case AF_INET6:
+		addr.v6.sin6_family = AF_INET6;
+		addr.v6.sin6_port = htons(0);
+		addr.v6.sin6_addr = in6addr_any;
+		addr_len = sizeof(addr.v6);
+		break;
+	default:
+		ASSERT_TRUE(false) TH_LOG("unsupported socket domain");
+	}
+
+	/* connect() doesn't need to succeed for late bind to happen */
+	connect(fd, &addr.sa, addr_len);
+
+	port = get_sock_port(fd);
+	ASSERT_GE(port, 40100);
+	ASSERT_LE(port, 40199);
+
+	err = close(fd);
+	ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+XFAIL_ADD(ip_local_port_range, ip4_stcp, late_bind);
+XFAIL_ADD(ip_local_port_range, ip6_stcp, late_bind);
+
+TEST_F(ip_local_port_range, get_port_range)
+{
+	__u16 lo, hi;
+	__u32 range;
+	int fd, err;
+
+	fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+	ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+	/* Get range before it will be set */
+	err = get_ip_local_port_range(fd, &range);
+	ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	unpack_port_range(range, &lo, &hi);
+	ASSERT_EQ(lo, 0) TH_LOG("unexpected low port");
+	ASSERT_EQ(hi, 0) TH_LOG("unexpected high port");
+
+	range = pack_port_range(12345, 54321);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	/* Get range after it has been set */
+	err = get_ip_local_port_range(fd, &range);
+	ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	unpack_port_range(range, &lo, &hi);
+	ASSERT_EQ(lo, 12345) TH_LOG("unexpected low port");
+	ASSERT_EQ(hi, 54321) TH_LOG("unexpected high port");
+
+	/* Unset the port range  */
+	range = pack_port_range(0, 0);
+	err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+	ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	/* Get range after it has been unset */
+	err = get_ip_local_port_range(fd, &range);
+	ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+	unpack_port_range(range, &lo, &hi);
+	ASSERT_EQ(lo, 0) TH_LOG("unexpected low port");
+	ASSERT_EQ(hi, 0) TH_LOG("unexpected high port");
+
+	err = close(fd);
+	ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh
new file mode 100755
index 000000000000..6c6ad346eaa0
--- /dev/null
+++ b/tools/testing/selftests/net/ip_local_port_range.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range'
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 17ced7d6ce25..be4a30a0d02a 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -41,7 +41,6 @@
 
 #define pr_err(fmt, ...)	printk(fmt ": %m", ##__VA_ARGS__)
 
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
 #define IPV4_STR_SZ	16	/* xxx.xxx.xxx.xxx is longest + \0 */
@@ -59,6 +58,8 @@
 #define VETH_FMT	"ktst-%d"
 #define VETH_LEN	12
 
+#define XFRM_ALGO_NR_KEYS 29
+
 static int nsfd_parent	= -1;
 static int nsfd_childa	= -1;
 static int nsfd_childb	= -1;
@@ -76,6 +77,43 @@ const unsigned int ping_timeout		= 300;
 const unsigned int ping_count		= 100;
 const unsigned int ping_success		= 80;
 
+struct xfrm_key_entry {
+	char algo_name[35];
+	int key_len;
+};
+
+struct xfrm_key_entry xfrm_key_entries[] = {
+	{"digest_null", 0},
+	{"ecb(cipher_null)", 0},
+	{"cbc(des)", 64},
+	{"hmac(md5)", 128},
+	{"cmac(aes)", 128},
+	{"xcbc(aes)", 128},
+	{"cbc(cast5)", 128},
+	{"cbc(serpent)", 128},
+	{"hmac(sha1)", 160},
+	{"hmac(rmd160)", 160},
+	{"cbc(des3_ede)", 192},
+	{"hmac(sha256)", 256},
+	{"cbc(aes)", 256},
+	{"cbc(camellia)", 256},
+	{"cbc(twofish)", 256},
+	{"rfc3686(ctr(aes))", 288},
+	{"hmac(sha384)", 384},
+	{"cbc(blowfish)", 448},
+	{"hmac(sha512)", 512},
+	{"rfc4106(gcm(aes))-128", 160},
+	{"rfc4543(gcm(aes))-128", 160},
+	{"rfc4309(ccm(aes))-128", 152},
+	{"rfc4106(gcm(aes))-192", 224},
+	{"rfc4543(gcm(aes))-192", 224},
+	{"rfc4309(ccm(aes))-192", 216},
+	{"rfc4106(gcm(aes))-256", 288},
+	{"rfc4543(gcm(aes))-256", 288},
+	{"rfc4309(ccm(aes))-256", 280},
+	{"rfc7539(chacha20,poly1305)-128", 0}
+};
+
 static void randomize_buffer(void *buf, size_t buflen)
 {
 	int *p = (int *)buf;
@@ -484,13 +522,16 @@ enum desc_type {
 	MONITOR_ACQUIRE,
 	EXPIRE_STATE,
 	EXPIRE_POLICY,
+	SPDINFO_ATTRS,
 };
 const char *desc_name[] = {
 	"create tunnel",
 	"alloc spi",
 	"monitor acquire",
 	"expire state",
-	"expire policy"
+	"expire policy",
+	"spdinfo attributes",
+	""
 };
 struct xfrm_desc {
 	enum desc_type	type;
@@ -765,65 +806,12 @@ static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from,
 static int xfrm_fill_key(char *name, char *buf,
 		size_t buf_len, unsigned int *key_len)
 {
-	/* TODO: use set/map instead */
-	if (strncmp(name, "digest_null", ALGO_LEN) == 0)
-		*key_len = 0;
-	else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0)
-		*key_len = 0;
-	else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0)
-		*key_len = 64;
-	else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0)
-		*key_len = 160;
-	else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0)
-		*key_len = 160;
-	else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0)
-		*key_len = 192;
-	else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0)
-		*key_len = 256;
-	else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0)
-		*key_len = 256;
-	else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0)
-		*key_len = 256;
-	else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0)
-		*key_len = 256;
-	else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0)
-		*key_len = 288;
-	else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0)
-		*key_len = 384;
-	else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0)
-		*key_len = 448;
-	else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0)
-		*key_len = 512;
-	else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0)
-		*key_len = 160;
-	else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0)
-		*key_len = 160;
-	else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0)
-		*key_len = 152;
-	else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0)
-		*key_len = 224;
-	else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0)
-		*key_len = 224;
-	else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0)
-		*key_len = 216;
-	else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0)
-		*key_len = 288;
-	else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0)
-		*key_len = 288;
-	else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0)
-		*key_len = 280;
-	else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0)
-		*key_len = 0;
+	int i;
+
+	for (i = 0; i < XFRM_ALGO_NR_KEYS; i++) {
+		if (strncmp(name, xfrm_key_entries[i].algo_name, ALGO_LEN) == 0)
+			*key_len = xfrm_key_entries[i].key_len;
+	}
 
 	if (*key_len > buf_len) {
 		printk("Can't pack a key - too big for buffer");
@@ -1593,6 +1581,155 @@ out_close:
 	return ret;
 }
 
+static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq,
+		unsigned thresh4_l, unsigned thresh4_r,
+		unsigned thresh6_l, unsigned thresh6_r,
+		bool add_bad_attr)
+
+{
+	struct {
+		struct nlmsghdr		nh;
+		union {
+			uint32_t	unused;
+			int		error;
+		};
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	struct xfrmu_spdhthresh thresh;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.unused));
+	req.nh.nlmsg_type	= XFRM_MSG_NEWSPDINFO;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= (*seq)++;
+
+	thresh.lbits = thresh4_l;
+	thresh.rbits = thresh4_r;
+	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh)))
+		return -1;
+
+	thresh.lbits = thresh6_l;
+	thresh.rbits = thresh6_r;
+	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh)))
+		return -1;
+
+	if (add_bad_attr) {
+		BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1);
+		if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) {
+			pr_err("adding attribute failed: no space");
+			return -1;
+		}
+	}
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+
+	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		return -1;
+	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+		return -1;
+	}
+
+	if (req.error) {
+		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq)
+{
+	struct {
+		struct nlmsghdr			nh;
+		union {
+			uint32_t	unused;
+			int		error;
+		};
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+
+	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) {
+		pr_err("Can't set SPD HTHRESH");
+		return KSFT_FAIL;
+	}
+
+	memset(&req, 0, sizeof(req));
+
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.unused));
+	req.nh.nlmsg_type	= XFRM_MSG_GETSPDINFO;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST;
+	req.nh.nlmsg_seq	= (*seq)++;
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return KSFT_FAIL;
+	}
+
+	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		return KSFT_FAIL;
+	} else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) {
+		size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused));
+		struct rtattr *attr = (void *)req.attrbuf;
+		int got_thresh = 0;
+
+		for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+			if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) {
+				struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+				got_thresh++;
+				if (t->lbits != 32 || t->rbits != 31) {
+					pr_err("thresh differ: %u, %u",
+							t->lbits, t->rbits);
+					return KSFT_FAIL;
+				}
+			}
+			if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) {
+				struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+				got_thresh++;
+				if (t->lbits != 120 || t->rbits != 16) {
+					pr_err("thresh differ: %u, %u",
+							t->lbits, t->rbits);
+					return KSFT_FAIL;
+				}
+			}
+		}
+		if (got_thresh != 2) {
+			pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh);
+			return KSFT_FAIL;
+		}
+	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+		return KSFT_FAIL;
+	} else {
+		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+		return -1;
+	}
+
+	/* Restore the default */
+	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) {
+		pr_err("Can't restore SPD HTHRESH");
+		return KSFT_FAIL;
+	}
+
+	/*
+	 * At this moment xfrm uses nlmsg_parse_deprecated(), which
+	 * implies NL_VALIDATE_LIBERAL - ignoring attributes with
+	 * (type > maxtype). nla_parse_depricated_strict() would enforce
+	 * it. Or even stricter nla_parse().
+	 * Right now it's not expected to fail, but to be ignored.
+	 */
+	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true))
+		return KSFT_PASS;
+
+	return KSFT_PASS;
+}
+
 static int child_serv(int xfrm_sock, uint32_t *seq,
 		unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
 {
@@ -1717,6 +1854,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
 		case EXPIRE_POLICY:
 			ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
 			break;
+		case SPDINFO_ATTRS:
+			ret = xfrm_spdinfo_attrs(xfrm_sock, &seq);
+			break;
 		default:
 			printk("Unknown desc type %d", desc.type);
 			exit(KSFT_FAIL);
@@ -1785,7 +1925,7 @@ static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
 		break;
 	default:
 		printk("got unknown msg type %d", msg->type);
-	};
+	}
 }
 
 static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
@@ -1994,8 +2134,10 @@ static int write_proto_plan(int fd, int proto)
  *   sizeof(xfrm_user_polexpire)  = 168  |  sizeof(xfrm_user_polexpire)  = 176
  *
  * Check the affected by the UABI difference structures.
+ * Also, check translation for xfrm_set_spdinfo: it has it's own attributes
+ * which needs to be correctly copied, but not translated.
  */
-const unsigned int compat_plan = 4;
+const unsigned int compat_plan = 5;
 static int write_compat_struct_tests(int test_desc_fd)
 {
 	struct xfrm_desc desc = {};
@@ -2019,6 +2161,10 @@ static int write_compat_struct_tests(int test_desc_fd)
 	if (__write_desc(test_desc_fd, &desc))
 		return -1;
 
+	desc.type = SPDINFO_ATTRS;
+	if (__write_desc(test_desc_fd, &desc))
+		return -1;
+
 	return 0;
 }
 
@@ -2117,7 +2263,7 @@ static int check_results(void)
 
 int main(int argc, char **argv)
 {
-	unsigned int nr_process = 1;
+	long nr_process = 1;
 	int route_sock = -1, ret = KSFT_SKIP;
 	int test_desc_fd[2];
 	uint32_t route_seq;
@@ -2138,7 +2284,7 @@ int main(int argc, char **argv)
 			exit_usage(argv);
 		}
 
-		if (nr_process > MAX_PROCESSES || !nr_process) {
+		if (nr_process > MAX_PROCESSES || nr_process < 1) {
 			printk("nr_process should be between [1; %u]",
 					MAX_PROCESSES);
 			exit_usage(argv);
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c
index a7c41375374f..708a9822259d 100644
--- a/tools/testing/selftests/net/ipv6_flowlabel.c
+++ b/tools/testing/selftests/net/ipv6_flowlabel.c
@@ -9,6 +9,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <linux/icmpv6.h>
 #include <linux/in6.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -29,26 +30,48 @@
 #ifndef IPV6_FLOWLABEL_MGR
 #define IPV6_FLOWLABEL_MGR 32
 #endif
+#ifndef IPV6_FLOWINFO_SEND
+#define IPV6_FLOWINFO_SEND 33
+#endif
 
 #define FLOWLABEL_WILDCARD	((uint32_t) -1)
 
 static const char cfg_data[]	= "a";
 static uint32_t cfg_label	= 1;
+static bool use_ping;
+static bool use_flowinfo_send;
+
+static struct icmp6hdr icmp6 = {
+	.icmp6_type = ICMPV6_ECHO_REQUEST
+};
+
+static struct sockaddr_in6 addr = {
+	.sin6_family = AF_INET6,
+	.sin6_addr = IN6ADDR_LOOPBACK_INIT,
+};
 
 static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel)
 {
 	char control[CMSG_SPACE(sizeof(flowlabel))] = {0};
 	struct msghdr msg = {0};
-	struct iovec iov = {0};
+	struct iovec iov = {
+		.iov_base = (char *)cfg_data,
+		.iov_len = sizeof(cfg_data)
+	};
 	int ret;
 
-	iov.iov_base = (char *)cfg_data;
-	iov.iov_len = sizeof(cfg_data);
+	if (use_ping) {
+		iov.iov_base = &icmp6;
+		iov.iov_len = sizeof(icmp6);
+	}
 
 	msg.msg_iov = &iov;
 	msg.msg_iovlen = 1;
 
-	if (with_flowlabel) {
+	if (use_flowinfo_send) {
+		msg.msg_name = &addr;
+		msg.msg_namelen = sizeof(addr);
+	} else if (with_flowlabel) {
 		struct cmsghdr *cm;
 
 		cm = (void *)control;
@@ -94,6 +117,8 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
 	ret = recvmsg(fd, &msg, 0);
 	if (ret == -1)
 		error(1, errno, "recv");
+	if (use_ping)
+		goto parse_cmsg;
 	if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
 		error(1, 0, "recv: truncated");
 	if (ret != sizeof(cfg_data))
@@ -101,6 +126,7 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
 	if (memcmp(data, cfg_data, sizeof(data)))
 		error(1, 0, "recv: data mismatch");
 
+parse_cmsg:
 	cm = CMSG_FIRSTHDR(&msg);
 	if (with_flowlabel) {
 		if (!cm)
@@ -114,9 +140,11 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
 		flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm));
 		fprintf(stderr, "recv with label %u\n", flowlabel);
 
-		if (expect != FLOWLABEL_WILDCARD && expect != flowlabel)
+		if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) {
 			fprintf(stderr, "recv: incorrect flowlabel %u != %u\n",
 					flowlabel, expect);
+			error(1, 0, "recv: flowlabel is wrong");
+		}
 
 	} else {
 		fprintf(stderr, "recv without label\n");
@@ -165,11 +193,17 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "l:")) != -1) {
+	while ((c = getopt(argc, argv, "l:ps")) != -1) {
 		switch (c) {
 		case 'l':
 			cfg_label = strtoul(optarg, NULL, 0);
 			break;
+		case 'p':
+			use_ping = true;
+			break;
+		case 's':
+			use_flowinfo_send = true;
+			break;
 		default:
 			error(1, 0, "%s: parse error", argv[0]);
 		}
@@ -178,27 +212,30 @@ static void parse_opts(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
-	struct sockaddr_in6 addr = {
-		.sin6_family = AF_INET6,
-		.sin6_port = htons(8000),
-		.sin6_addr = IN6ADDR_LOOPBACK_INIT,
-	};
 	const int one = 1;
 	int fdt, fdr;
+	int prot = 0;
+
+	addr.sin6_port = htons(8000);
 
 	parse_opts(argc, argv);
 
-	fdt = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (use_ping) {
+		fprintf(stderr, "attempting to use ping sockets\n");
+		prot = IPPROTO_ICMPV6;
+	}
+
+	fdt = socket(PF_INET6, SOCK_DGRAM, prot);
 	if (fdt == -1)
 		error(1, errno, "socket t");
 
-	fdr = socket(PF_INET6, SOCK_DGRAM, 0);
+	fdr = use_ping ? fdt : socket(PF_INET6, SOCK_DGRAM, 0);
 	if (fdr == -1)
 		error(1, errno, "socket r");
 
 	if (connect(fdt, (void *)&addr, sizeof(addr)))
 		error(1, errno, "connect");
-	if (bind(fdr, (void *)&addr, sizeof(addr)))
+	if (!use_ping && bind(fdr, (void *)&addr, sizeof(addr)))
 		error(1, errno, "bind");
 
 	flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE);
@@ -216,13 +253,21 @@ int main(int argc, char **argv)
 		do_recv(fdr, false, 0);
 	}
 
+	if (use_flowinfo_send) {
+		fprintf(stderr, "using IPV6_FLOWINFO_SEND to send label\n");
+		addr.sin6_flowinfo = htonl(cfg_label);
+		if (setsockopt(fdt, SOL_IPV6, IPV6_FLOWINFO_SEND, &one,
+			       sizeof(one)) == -1)
+			error(1, errno, "setsockopt flowinfo_send");
+	}
+
 	fprintf(stderr, "send label\n");
 	do_send(fdt, true, cfg_label);
 	do_recv(fdr, true, cfg_label);
 
 	if (close(fdr))
 		error(1, errno, "close r");
-	if (close(fdt))
+	if (!use_ping && close(fdt))
 		error(1, errno, "close t");
 
 	return 0;
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh
index d3bc6442704e..cee95e252bee 100755
--- a/tools/testing/selftests/net/ipv6_flowlabel.sh
+++ b/tools/testing/selftests/net/ipv6_flowlabel.sh
@@ -18,4 +18,20 @@ echo "TEST datapath (with auto-flowlabels)"
 ./in_netns.sh \
   sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1'
 
+echo "TEST datapath (with ping-sockets)"
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \
+    sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \
+    ./ipv6_flowlabel -l 1 -p'
+
+echo "TEST datapath (with flowinfo-send)"
+./in_netns.sh \
+  sh -c './ipv6_flowlabel -l 1 -s'
+
+echo "TEST datapath (with ping-sockets flowinfo-send)"
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \
+    sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \
+    ./ipv6_flowlabel -l 1 -p -s'
+
 echo OK. All tests passed
diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
new file mode 100755
index 000000000000..f11756e7df2f
--- /dev/null
+++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
@@ -0,0 +1,446 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Author: Matthias May <matthias.may@westermo.com>
+#
+# This script evaluates ip tunnels that are capable of carrying L2 traffic
+# if they inherit or set the inheritable fields.
+# Namely these tunnels are: 'gretap', 'vxlan' and 'geneve'.
+# Checked inheritable fields are: TOS and TTL.
+# The outer tunnel protocol of 'IPv4' or 'IPv6' is verified-
+# As payload frames of type 'IPv4', 'IPv6' and 'other'(ARP) are verified.
+# In addition this script also checks if forcing a specific field in the
+# outer header is working.
+
+# Return 4 by default (Kselftest SKIP code)
+ERR=4
+
+if [ "$(id -u)" != "0" ]; then
+	echo "Please run as root."
+	exit $ERR
+fi
+if ! which tcpdump > /dev/null 2>&1; then
+	echo "No tcpdump found. Required for this test."
+	exit $ERR
+fi
+
+expected_tos="0x00"
+expected_ttl="0"
+failed=false
+
+readonly NS0=$(mktemp -u ns0-XXXXXXXX)
+readonly NS1=$(mktemp -u ns1-XXXXXXXX)
+
+RUN_NS0="ip netns exec ${NS0}"
+
+get_random_tos() {
+	# Get a random hex tos value between 0x00 and 0xfc, a multiple of 4
+	echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\
+$(tr -dc '048c' < /dev/urandom | head -c 1)"
+}
+get_random_ttl() {
+	# Get a random dec value between 0 and 255
+	printf "%d" "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 2)"
+}
+get_field() {
+	# Expects to get the 'head -n 1' of a captured frame by tcpdump.
+	# Parses this first line and returns the specified field.
+	local field="$1"
+	local input="$2"
+	local found=false
+	input="$(echo "$input" | tr -d '(),')"
+	for input_field in $input; do
+		if $found; then
+			echo "$input_field"
+			return
+		fi
+		# The next field that we iterate over is the looked for value
+		if [ "$input_field" = "$field" ]; then
+			found=true
+		fi
+	done
+	echo "0"
+}
+setup() {
+	local type="$1"
+	local outer="$2"
+	local inner="$3"
+	local tos_ttl="$4"
+	local vlan="$5"
+	local test_tos="0x00"
+	local test_ttl="0"
+
+	# We don't want a test-tos of 0x00,
+	# because this is the value that we get when no tos is set.
+	expected_tos="$(get_random_tos)"
+	while [ "$expected_tos" = "0x00" ]; do
+		expected_tos="$(get_random_tos)"
+	done
+	if [ "$tos_ttl" = "random" ]; then
+		test_tos="$expected_tos"
+		tos="fixed $test_tos"
+	elif [ "$tos_ttl" = "inherit" ]; then
+		test_tos="$tos_ttl"
+		tos="inherit $expected_tos"
+	fi
+
+	# We don't want a test-ttl of 64 or 0,
+	# because 64 is when no ttl is set and 0 is not a valid ttl.
+	expected_ttl="$(get_random_ttl)"
+	while [ "$expected_ttl" = "64" ] || [ "$expected_ttl" = "0" ]; do
+		expected_ttl="$(get_random_ttl)"
+	done
+
+	if [ "$tos_ttl" = "random" ]; then
+		test_ttl="$expected_ttl"
+		ttl="fixed $test_ttl"
+	elif [ "$tos_ttl" = "inherit" ]; then
+		test_ttl="$tos_ttl"
+		ttl="inherit $expected_ttl"
+	fi
+	printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \
+	"$type" "$outer" "$inner" "$tos" "$ttl" "$vlan"
+
+	# Create netns NS0 and NS1 and connect them with a veth pair
+	ip netns add "${NS0}"
+	ip netns add "${NS1}"
+	ip link add name veth0 netns "${NS0}" type veth \
+		peer name veth1 netns "${NS1}"
+	ip -netns "${NS0}" link set dev veth0 up
+	ip -netns "${NS1}" link set dev veth1 up
+	ip -netns "${NS0}" address flush dev veth0
+	ip -netns "${NS1}" address flush dev veth1
+
+	local local_addr1=""
+	local local_addr2=""
+	if [ "$type" = "gre" ] || [ "$type" = "vxlan" ]; then
+		if [ "$outer" = "4" ]; then
+			local_addr1="local 198.18.0.1"
+			local_addr2="local 198.18.0.2"
+		elif [ "$outer" = "6" ]; then
+			local_addr1="local fdd1:ced0:5d88:3fce::1"
+			local_addr2="local fdd1:ced0:5d88:3fce::2"
+		fi
+	fi
+	local vxlan=""
+	if [ "$type" = "vxlan" ]; then
+		vxlan="vni 100 dstport 4789"
+	fi
+	local geneve=""
+	if [ "$type" = "geneve" ]; then
+		geneve="vni 100"
+	fi
+	# Create tunnel and assign outer IPv4/IPv6 addresses
+	if [ "$outer" = "4" ]; then
+		if [ "$type" = "gre" ]; then
+			type="gretap"
+		fi
+		ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0
+		ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1
+		ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \
+			remote 198.18.0.2 tos $test_tos ttl $test_ttl         \
+			$vxlan $geneve
+		ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \
+			remote 198.18.0.1 tos $test_tos ttl $test_ttl         \
+			$vxlan $geneve
+	elif [ "$outer" = "6" ]; then
+		if [ "$type" = "gre" ]; then
+			type="ip6gretap"
+		fi
+		ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \
+			dev veth0 nodad
+		ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \
+			dev veth1 nodad
+		ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \
+			remote fdd1:ced0:5d88:3fce::2 tos $test_tos           \
+			ttl $test_ttl $vxlan $geneve
+		ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \
+			remote fdd1:ced0:5d88:3fce::1 tos $test_tos           \
+			ttl $test_ttl $vxlan $geneve
+	fi
+
+	# Bring L2-tunnel link up and create VLAN on top
+	ip -netns "${NS0}" link set tep0 up
+	ip -netns "${NS1}" link set tep1 up
+	ip -netns "${NS0}" address flush dev tep0
+	ip -netns "${NS1}" address flush dev tep1
+	local parent
+	if $vlan; then
+		parent="vlan99-"
+		ip -netns "${NS0}" link add link tep0 name ${parent}0 \
+			type vlan id 99
+		ip -netns "${NS1}" link add link tep1 name ${parent}1 \
+			type vlan id 99
+		ip -netns "${NS0}" link set dev ${parent}0 up
+		ip -netns "${NS1}" link set dev ${parent}1 up
+		ip -netns "${NS0}" address flush dev ${parent}0
+		ip -netns "${NS1}" address flush dev ${parent}1
+	else
+		parent="tep"
+	fi
+
+	# Assign inner IPv4/IPv6 addresses
+	if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then
+		ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0
+		ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1
+	elif [ "$inner" = "6" ]; then
+		ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \
+			dev ${parent}0 nodad
+		ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \
+			dev ${parent}1 nodad
+	fi
+}
+
+verify() {
+	local outer="$1"
+	local inner="$2"
+	local tos_ttl="$3"
+	local vlan="$4"
+
+	local ping_pid out captured_tos captured_ttl result
+
+	local ping_dst
+	if [ "$inner" = "4" ]; then
+		ping_dst="198.19.0.2"
+	elif [ "$inner" = "6" ]; then
+		ping_dst="fdd4:96cf:4eae:443b::2"
+	elif [ "$inner" = "other" ]; then
+		ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6
+	fi
+	if [ "$tos_ttl" = "inherit" ]; then
+		${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos"          \
+			 -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!"
+	else
+		${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!"
+	fi
+	local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset
+	if [ "$type" = "gre" ]; then
+		tunnel_type_proto="0x2f"
+	elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+		tunnel_type_proto="0x11"
+	fi
+	if [ "$outer" = "4" ]; then
+		tunnel_type_offset="9"
+		if [ "$inner" = "4" ]; then
+			req_proto_offset="47"
+			req_offset="58"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 12))"
+				req_offset="$((req_offset + 12))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+				-i veth0 -n                                   \
+				ip[$tunnel_type_offset] = $tunnel_type_proto and \
+				ip[$req_proto_offset] = 0x01 and              \
+				ip[$req_offset] = 0x08 2>/dev/null            \
+				| head -n 1)"
+		elif [ "$inner" = "6" ]; then
+			req_proto_offset="44"
+			req_offset="78"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 12))"
+				req_offset="$((req_offset + 12))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+				-i veth0 -n                                   \
+				ip[$tunnel_type_offset] = $tunnel_type_proto and \
+				ip[$req_proto_offset] = 0x3a and              \
+				ip[$req_offset] = 0x80 2>/dev/null            \
+				| head -n 1)"
+		elif [ "$inner" = "other" ]; then
+			req_proto_offset="36"
+			req_offset="45"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 12))"
+				req_offset="$((req_offset + 12))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if [ "$tos_ttl" = "inherit" ]; then
+				expected_tos="0x00"
+				expected_ttl="64"
+			fi
+			out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+				-i veth0 -n                                   \
+				ip[$tunnel_type_offset] = $tunnel_type_proto and \
+				ip[$req_proto_offset] = 0x08 and              \
+				ip[$((req_proto_offset + 1))] = 0x06 and      \
+				ip[$req_offset] = 0x01 2>/dev/null            \
+				| head -n 1)"
+		fi
+	elif [ "$outer" = "6" ]; then
+		if [ "$type" = "gre" ]; then
+			tunnel_type_offset="40"
+		elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+			tunnel_type_offset="6"
+		fi
+		if [ "$inner" = "4" ]; then
+			local req_proto_offset="75"
+			local req_offset="86"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+				-i veth0 -n                                   \
+				ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+				ip6[$req_proto_offset] = 0x01 and             \
+				ip6[$req_offset] = 0x08 2>/dev/null           \
+				| head -n 1)"
+		elif [ "$inner" = "6" ]; then
+			local req_proto_offset="72"
+			local req_offset="106"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+				-i veth0 -n                                   \
+				ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+				ip6[$req_proto_offset] = 0x3a and             \
+				ip6[$req_offset] = 0x80 2>/dev/null           \
+				| head -n 1)"
+		elif [ "$inner" = "other" ]; then
+			local req_proto_offset="64"
+			local req_offset="73"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if [ "$tos_ttl" = "inherit" ]; then
+				expected_tos="0x00"
+				expected_ttl="64"
+			fi
+			out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+				-i veth0 -n                                   \
+				ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+				ip6[$req_proto_offset] = 0x08 and             \
+				ip6[$((req_proto_offset + 1))] = 0x06 and     \
+				ip6[$req_offset] = 0x01 2>/dev/null           \
+				| head -n 1)"
+		fi
+	fi
+	kill -9 $ping_pid
+	wait $ping_pid 2>/dev/null || true
+	result="FAIL"
+	if [ "$outer" = "4" ]; then
+		captured_ttl="$(get_field "ttl" "$out")"
+		captured_tos="$(printf "0x%02x" "$(get_field "tos" "$out")")"
+		if [ "$captured_tos" = "$expected_tos" ] &&
+		   [ "$captured_ttl" = "$expected_ttl" ]; then
+			result="OK"
+		fi
+	elif [ "$outer" = "6" ]; then
+		captured_ttl="$(get_field "hlim" "$out")"
+		captured_tos="$(printf "0x%02x" "$(get_field "class" "$out")")"
+		if [ "$captured_tos" = "$expected_tos" ] &&
+		   [ "$captured_ttl" = "$expected_ttl" ]; then
+			result="OK"
+		fi
+	fi
+
+	printf "%7s │\n" "$result"
+	if [ "$result" = "FAIL" ]; then
+		failed=true
+		if [ "$captured_tos" != "$expected_tos" ]; then
+			printf "│%43s%27s │\n" \
+			"Expected TOS value: $expected_tos" \
+			"Captured TOS value: $captured_tos"
+		fi
+		if [ "$captured_ttl" != "$expected_ttl" ]; then
+			printf "│%43s%27s │\n" \
+			"Expected TTL value: $expected_ttl" \
+			"Captured TTL value: $captured_ttl"
+		fi
+		printf "│%71s│\n" " "
+	fi
+}
+
+cleanup() {
+	ip netns del "${NS0}" 2>/dev/null
+	ip netns del "${NS1}" 2>/dev/null
+}
+
+exit_handler() {
+	# Don't exit immediately if one of the intermediate commands fails.
+	# We might be called at the end of the script, when the network
+	# namespaces have already been deleted. So cleanup() may fail, but we
+	# still need to run until 'exit $ERR' or the script won't return the
+	# correct error code.
+	set +e
+
+	cleanup
+
+	exit $ERR
+}
+
+# Restore the default SIGINT handler (just in case) and exit.
+# The exit handler will take care of cleaning everything up.
+interrupted() {
+	trap - INT
+
+	exit $ERR
+}
+
+set -e
+trap exit_handler EXIT
+trap interrupted INT
+
+printf "┌────────┬───────┬───────┬──────────────┬"
+printf "──────────────┬───────┬────────┐\n"
+for type in gre vxlan geneve; do
+	if ! $(modprobe "$type" 2>/dev/null); then
+		continue
+	fi
+	for outer in 4 6; do
+		printf "├────────┼───────┼───────┼──────────────┼"
+		printf "──────────────┼───────┼────────┤\n"
+		printf "│  Type  │ outer | inner │     tos      │"
+		printf "      ttl     │  vlan │ result │\n"
+		for inner in 4 6 other; do
+			printf "├────────┼───────┼───────┼──────────────┼"
+			printf "──────────────┼───────┼────────┤\n"
+			for tos_ttl in inherit random; do
+				for vlan in false true; do
+					setup "$type" "$outer" "$inner" \
+					"$tos_ttl" "$vlan"
+					verify "$outer" "$inner" "$tos_ttl" \
+					"$vlan"
+					cleanup
+				done
+			done
+		done
+	done
+done
+printf "└────────┴───────┴───────┴──────────────┴"
+printf "──────────────┴───────┴────────┘\n"
+
+# All tests done.
+# Set ERR appropriately: it will be returned by the exit handler.
+if $failed; then
+	ERR=1
+else
+	ERR=0
+fi
diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh
index 5782433886fc..88de7166c8ae 100755
--- a/tools/testing/selftests/net/l2tp.sh
+++ b/tools/testing/selftests/net/l2tp.sh
@@ -13,6 +13,7 @@
 #                10.1.1.1    |            |   10.1.2.1
 #              2001:db8:1::1 |            | 2001:db8:2::1
 
+source lib.sh
 VERBOSE=0
 PAUSE_ON_FAIL=no
 
@@ -80,9 +81,6 @@ create_ns()
 	[ -z "${addr}" ] && addr="-"
 	[ -z "${addr6}" ] && addr6="-"
 
-	ip netns add ${ns}
-
-	ip -netns ${ns} link set lo up
 	if [ "${addr}" != "-" ]; then
 		ip -netns ${ns} addr add dev lo ${addr}
 	fi
@@ -133,12 +131,7 @@ connect_ns()
 
 cleanup()
 {
-	local ns
-
-	for ns in host-1 host-2 router
-	do
-		ip netns del ${ns} 2>/dev/null
-	done
+	cleanup_ns $host_1 $host_2 $router
 }
 
 setup_l2tp_ipv4()
@@ -146,28 +139,28 @@ setup_l2tp_ipv4()
 	#
 	# configure l2tpv3 tunnel on host-1
 	#
-	ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \
+	ip -netns $host_1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \
 			 encap ip local 10.1.1.1 remote 10.1.2.1
-	ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \
+	ip -netns $host_1 l2tp add session name l2tp4 tunnel_id 1041 \
 			 session_id 1041 peer_session_id 1042
-	ip -netns host-1 link set dev l2tp4 up
-	ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2
+	ip -netns $host_1 link set dev l2tp4 up
+	ip -netns $host_1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2
 
 	#
 	# configure l2tpv3 tunnel on host-2
 	#
-	ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \
+	ip -netns $host_2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \
 			 encap ip local 10.1.2.1 remote 10.1.1.1
-	ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \
+	ip -netns $host_2 l2tp add session name l2tp4 tunnel_id 1042 \
 			 session_id 1042 peer_session_id 1041
-	ip -netns host-2 link set dev l2tp4 up
-	ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1
+	ip -netns $host_2 link set dev l2tp4 up
+	ip -netns $host_2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1
 
 	#
 	# add routes to loopback addresses
 	#
-	ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2
-	ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1
+	ip -netns $host_1 ro add 172.16.101.2/32 via 172.16.1.2
+	ip -netns $host_2 ro add 172.16.101.1/32 via 172.16.1.1
 }
 
 setup_l2tp_ipv6()
@@ -175,28 +168,28 @@ setup_l2tp_ipv6()
 	#
 	# configure l2tpv3 tunnel on host-1
 	#
-	ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \
+	ip -netns $host_1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \
 			 encap ip local 2001:db8:1::1 remote 2001:db8:2::1
-	ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \
+	ip -netns $host_1 l2tp add session name l2tp6 tunnel_id 1061 \
 			 session_id 1061 peer_session_id 1062
-	ip -netns host-1 link set dev l2tp6 up
-	ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2
+	ip -netns $host_1 link set dev l2tp6 up
+	ip -netns $host_1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2
 
 	#
 	# configure l2tpv3 tunnel on host-2
 	#
-	ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \
+	ip -netns $host_2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \
 			 encap ip local 2001:db8:2::1 remote 2001:db8:1::1
-	ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \
+	ip -netns $host_2 l2tp add session name l2tp6 tunnel_id 1062 \
 			 session_id 1062 peer_session_id 1061
-	ip -netns host-2 link set dev l2tp6 up
-	ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1
+	ip -netns $host_2 link set dev l2tp6 up
+	ip -netns $host_2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1
 
 	#
 	# add routes to loopback addresses
 	#
-	ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2
-	ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1
+	ip -netns $host_1 -6 ro add fc00:101::2/128 via fc00:1::2
+	ip -netns $host_2 -6 ro add fc00:101::1/128 via fc00:1::1
 }
 
 setup()
@@ -205,21 +198,22 @@ setup()
 	cleanup
 
 	set -e
-	create_ns host-1 172.16.101.1/32 fc00:101::1/128
-	create_ns host-2 172.16.101.2/32 fc00:101::2/128
-	create_ns router
+	setup_ns host_1 host_2 router
+	create_ns $host_1 172.16.101.1/32 fc00:101::1/128
+	create_ns $host_2 172.16.101.2/32 fc00:101::2/128
+	create_ns $router
 
-	connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \
-	           router eth1 10.1.1.2/24 2001:db8:1::2/64
+	connect_ns $host_1 eth0 10.1.1.1/24 2001:db8:1::1/64 \
+	           $router eth1 10.1.1.2/24 2001:db8:1::2/64
 
-	connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \
-	           router eth2 10.1.2.2/24 2001:db8:2::2/64
+	connect_ns $host_2 eth0 10.1.2.1/24 2001:db8:2::1/64 \
+	           $router eth2 10.1.2.2/24 2001:db8:2::2/64
 
-	ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2
-	ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2
+	ip -netns $host_1 ro add 10.1.2.0/24 via 10.1.1.2
+	ip -netns $host_1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2
 
-	ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2
-	ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2
+	ip -netns $host_2 ro add 10.1.1.0/24 via 10.1.2.2
+	ip -netns $host_2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2
 
 	setup_l2tp_ipv4
 	setup_l2tp_ipv6
@@ -231,38 +225,38 @@ setup_ipsec()
 	#
 	# IPv4
 	#
-	run_cmd host-1 ip xfrm policy add \
+	run_cmd $host_1 ip xfrm policy add \
 		src 10.1.1.1 dst 10.1.2.1 dir out \
 		tmpl proto esp mode transport
 
-	run_cmd host-1 ip xfrm policy add \
+	run_cmd $host_1 ip xfrm policy add \
 		src 10.1.2.1 dst 10.1.1.1 dir in \
 		tmpl proto esp mode transport
 
-	run_cmd host-2 ip xfrm policy add \
+	run_cmd $host_2 ip xfrm policy add \
 		src 10.1.1.1 dst 10.1.2.1 dir in \
 		tmpl proto esp mode transport
 
-	run_cmd host-2 ip xfrm policy add \
+	run_cmd $host_2 ip xfrm policy add \
 		src 10.1.2.1 dst 10.1.1.1 dir out \
 		tmpl proto esp mode transport
 
-	ip -netns host-1 xfrm state add \
+	ip -netns $host_1 xfrm state add \
 		src 10.1.1.1 dst 10.1.2.1 \
 		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-1 xfrm state add \
+	ip -netns $host_1 xfrm state add \
 		src 10.1.2.1 dst 10.1.1.1 \
 		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-2 xfrm state add \
+	ip -netns $host_2 xfrm state add \
 		src 10.1.1.1 dst 10.1.2.1 \
 		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-2 xfrm state add \
+	ip -netns $host_2 xfrm state add \
 		src 10.1.2.1 dst 10.1.1.1 \
 		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
@@ -270,38 +264,38 @@ setup_ipsec()
 	#
 	# IPV6
 	#
-	run_cmd host-1 ip -6 xfrm policy add \
+	run_cmd $host_1 ip -6 xfrm policy add \
 		src 2001:db8:1::1 dst 2001:db8:2::1 dir out \
 		tmpl proto esp mode transport
 
-	run_cmd host-1 ip -6 xfrm policy add \
+	run_cmd $host_1 ip -6 xfrm policy add \
 		src 2001:db8:2::1 dst 2001:db8:1::1 dir in \
 		tmpl proto esp mode transport
 
-	run_cmd host-2 ip -6 xfrm policy add \
+	run_cmd $host_2 ip -6 xfrm policy add \
 		src 2001:db8:1::1 dst 2001:db8:2::1 dir in \
 		tmpl proto esp mode transport
 
-	run_cmd host-2 ip -6 xfrm policy add \
+	run_cmd $host_2 ip -6 xfrm policy add \
 		src 2001:db8:2::1 dst 2001:db8:1::1 dir out \
 		tmpl proto esp mode transport
 
-	ip -netns host-1 -6 xfrm state add \
+	ip -netns $host_1 -6 xfrm state add \
 		src 2001:db8:1::1 dst 2001:db8:2::1 \
 		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-1 -6 xfrm state add \
+	ip -netns $host_1 -6 xfrm state add \
 		src 2001:db8:2::1 dst 2001:db8:1::1 \
 		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-2 -6 xfrm state add \
+	ip -netns $host_2 -6 xfrm state add \
 		src 2001:db8:1::1 dst 2001:db8:2::1 \
 		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-2 -6 xfrm state add \
+	ip -netns $host_2 -6 xfrm state add \
 		src 2001:db8:2::1 dst 2001:db8:1::1 \
 		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
@@ -309,10 +303,10 @@ setup_ipsec()
 
 teardown_ipsec()
 {
-	run_cmd host-1 ip xfrm state flush
-	run_cmd host-1 ip xfrm policy flush
-	run_cmd host-2 ip xfrm state flush
-	run_cmd host-2 ip xfrm policy flush
+	run_cmd $host_1 ip xfrm state flush
+	run_cmd $host_1 ip xfrm policy flush
+	run_cmd $host_2 ip xfrm state flush
+	run_cmd $host_2 ip xfrm policy flush
 }
 
 ################################################################################
@@ -322,16 +316,16 @@ run_ping()
 {
 	local desc="$1"
 
-	run_cmd host-1 ping -c1 -w1 172.16.1.2
+	run_cmd $host_1 ping -c1 -w1 172.16.1.2
 	log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
 
-	run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+	run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
 	log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
 
-	run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+	run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2
 	log_test $? 0 "IPv6 basic L2TP tunnel ${desc}"
 
-	run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+	run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
 	log_test $? 0 "IPv6 route through L2TP tunnel ${desc}"
 }
 
@@ -344,16 +338,16 @@ run_tests()
 
 	setup_ipsec
 	run_ping "- with IPsec"
-	run_cmd host-1 ping -c1 -w1 172.16.1.2
+	run_cmd $host_1 ping -c1 -w1 172.16.1.2
 	log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
 
-	run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+	run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
 	log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
 
-	run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+	run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2
 	log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec"
 
-	run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+	run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
 	log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec"
 
 	teardown_ipsec
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
new file mode 100644
index 000000000000..f9fe182dfbd4
--- /dev/null
+++ b/tools/testing/selftests/net/lib.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
+BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+# namespace list created by setup_ns
+NS_LIST=""
+
+##############################################################################
+# Helpers
+busywait()
+{
+	local timeout=$1; shift
+
+	local start_time="$(date -u +%s%3N)"
+	while true
+	do
+		local out
+		out=$("$@")
+		local ret=$?
+		if ((!ret)); then
+			echo -n "$out"
+			return 0
+		fi
+
+		local current_time="$(date -u +%s%3N)"
+		if ((current_time - start_time > timeout)); then
+			echo -n "$out"
+			return 1
+		fi
+	done
+}
+
+cleanup_ns()
+{
+	local ns=""
+	local errexit=0
+	local ret=0
+
+	# disable errexit temporary
+	if [[ $- =~ "e" ]]; then
+		errexit=1
+		set +e
+	fi
+
+	for ns in "$@"; do
+		ip netns delete "${ns}" &> /dev/null
+		if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
+			echo "Warn: Failed to remove namespace $ns"
+			ret=1
+		fi
+	done
+
+	[ $errexit -eq 1 ] && set -e
+	return $ret
+}
+
+cleanup_all_ns()
+{
+	cleanup_ns $NS_LIST
+}
+
+# setup netns with given names as prefix. e.g
+# setup_ns local remote
+setup_ns()
+{
+	local ns=""
+	local ns_name=""
+	local ns_list=""
+	for ns_name in "$@"; do
+		# Some test may setup/remove same netns multi times
+		if unset ${ns_name} 2> /dev/null; then
+			ns="${ns_name,,}-$(mktemp -u XXXXXX)"
+			eval readonly ${ns_name}="$ns"
+		else
+			eval ns='$'${ns_name}
+			cleanup_ns "$ns"
+
+		fi
+
+		if ! ip netns add "$ns"; then
+			echo "Failed to create namespace $ns_name"
+			cleanup_ns "$ns_list"
+			return $ksft_skip
+		fi
+		ip -n "$ns" link set lo up
+		ns_list="$ns_list $ns"
+	done
+	NS_LIST="$NS_LIST $ns_list"
+}
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 260336d5f0b1..49daae73c41e 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 mptcp_connect
+mptcp_inq
+mptcp_sockopt
 pm_nl_ctl
 *.pcap
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 00bb158b4a5d..7b936a926859 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -1,16 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0
 
 top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
 
-CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g  -I$(top_srcdir)/usr/include
+CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
 
 TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
-	      simult_flows.sh
+	      simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
 
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl
+TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
 
-TEST_FILES := settings
+TEST_FILES := mptcp_lib.sh settings
 
 EXTRA_CLEAN := *.pcap
 
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 741a1c4f4ae8..4f80014cae49 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,3 +1,4 @@
+CONFIG_KALLSYMS=y
 CONFIG_MPTCP=y
 CONFIG_IPV6=y
 CONFIG_MPTCP_IPV6=y
@@ -5,3 +6,29 @@ CONFIG_INET_DIAG=m
 CONFIG_INET_MPTCP_DIAG=m
 CONFIG_VETH=y
 CONFIG_NET_SCH_NETEM=m
+CONFIG_SYN_COOKIES=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SOCKET=m
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IP6_NF_FILTER=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_SCH_INGRESS=m
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 39edce4f541c..bc97ab33a00e 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -1,12 +1,17 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns="ns1-$rndh"
-ksft_skip=4
-test_cnt=1
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+ns=""
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
 ret=0
-pids=()
 
 flush_pids()
 {
@@ -14,108 +19,290 @@ flush_pids()
 	# give it some time
 	sleep 1.1
 
-	for pid in ${pids[@]}; do
-		[ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1
+	ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
+
+	for _ in $(seq $((timeout_poll * 10))); do
+		[ -z "$(ip netns pids "${ns}")" ] && break
+		sleep 0.1
 	done
-	pids=()
 }
 
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
 cleanup()
 {
-	ip netns del $ns
-	for pid in ${pids[@]}; do
-		[ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1
-	done
+	ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
+
+	mptcp_lib_ns_exit "${ns}"
 }
 
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without ip tool"
-	exit $ksft_skip
-fi
-ss -h | grep -q MPTCP
-if [ $? -ne 0 ];then
-	echo "SKIP: ss tool does not support MPTCP"
-	exit $ksft_skip
-fi
+mptcp_lib_check_mptcp
+mptcp_lib_check_tools ip ss
+
+get_msk_inuse()
+{
+	ip netns exec $ns cat /proc/net/protocols | awk '$1~/^MPTCP$/{print $3}'
+}
 
 __chk_nr()
 {
-	local condition="$1"
+	local command="$1"
 	local expected=$2
-	local msg nr
+	local msg="$3"
+	local skip="${4-SKIP}"
+	local nr
 
-	shift 2
-	msg=$*
-	nr=$(ss -inmHMN $ns | $condition)
+	nr=$(eval $command)
 
-	printf "%-50s" "$msg"
-	if [ $nr != $expected ]; then
-		echo "[ fail ] expected $expected found $nr"
-		ret=$test_cnt
+	mptcp_lib_print_title "$msg"
+	if [ "$nr" != "$expected" ]; then
+		if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then
+			mptcp_lib_pr_skip "Feature probably not supported"
+			mptcp_lib_result_skip "${msg}"
+		else
+			mptcp_lib_pr_fail "expected $expected found $nr"
+			mptcp_lib_result_fail "${msg}"
+			ret=${KSFT_FAIL}
+		fi
 	else
-		echo "[  ok  ]"
+		mptcp_lib_pr_ok
+		mptcp_lib_result_pass "${msg}"
 	fi
-	test_cnt=$((test_cnt+1))
+}
+
+__chk_msk_nr()
+{
+	local condition=$1
+	shift 1
+
+	__chk_nr "ss -inmHMN $ns | $condition" "$@"
 }
 
 chk_msk_nr()
 {
-	__chk_nr "grep -c token:" $*
+	__chk_msk_nr "grep -c token:" "$@"
+}
+
+chk_listener_nr()
+{
+	local expected=$1
+	local msg="$2"
+
+	__chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0
+	__chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows"
+}
+
+wait_msk_nr()
+{
+	local condition="grep -c token:"
+	local expected=$1
+	local timeout=20
+	local msg nr
+	local max=0
+	local i=0
+
+	shift 1
+	msg=$*
+
+	while [ $i -lt $timeout ]; do
+		nr=$(ss -inmHMN $ns | $condition)
+		[ $nr == $expected ] && break;
+		[ $nr -gt $max ] && max=$nr
+		i=$((i + 1))
+		sleep 1
+	done
+
+	mptcp_lib_print_title "$msg"
+	if [ $i -ge $timeout ]; then
+		mptcp_lib_pr_fail "timeout while expecting $expected max $max last $nr"
+		mptcp_lib_result_fail "${msg} # timeout"
+		ret=${KSFT_FAIL}
+	elif [ $nr != $expected ]; then
+		mptcp_lib_pr_fail "expected $expected found $nr"
+		mptcp_lib_result_fail "${msg} # unexpected result"
+		ret=${KSFT_FAIL}
+	else
+		mptcp_lib_pr_ok
+		mptcp_lib_result_pass "${msg}"
+	fi
 }
 
 chk_msk_fallback_nr()
 {
-		__chk_nr "grep -c fallback" $*
+	__chk_msk_nr "grep -c fallback" "$@"
 }
 
 chk_msk_remote_key_nr()
 {
-		__chk_nr "grep -c remote_key" $*
+	__chk_msk_nr "grep -c remote_key" "$@"
+}
+
+__chk_listen()
+{
+	local filter="$1"
+	local expected=$2
+	local msg="$3"
+
+	__chk_nr "ss -N $ns -Ml '$filter' | grep -c LISTEN" "$expected" "$msg" 0
+}
+
+chk_msk_listen()
+{
+	lport=$1
+
+	# destination port search should always return empty list
+	__chk_listen "dport $lport" 0 "listen match for dport $lport"
+
+	# should return 'our' mptcp listen socket
+	__chk_listen "sport $lport" 1 "listen match for sport $lport"
+
+	__chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport"
+
+	__chk_listen "" 1 "all listen sockets"
+
+	nr=$(ss -Ml $filter | wc -l)
+}
+
+chk_msk_inuse()
+{
+	local expected=$1
+	local msg="....chk ${2:-${expected}} msk in use"
+	local listen_nr
+
+	if [ "${expected}" -eq 0 ]; then
+		msg+=" after flush"
+	fi
+
+	listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN)
+	expected=$((expected + listen_nr))
+
+	for _ in $(seq 10); do
+		if [ "$(get_msk_inuse)" -eq $expected ]; then
+			break
+		fi
+		sleep 0.1
+	done
+
+	__chk_nr get_msk_inuse $expected "${msg}" 0
 }
 
+# $1: cestab nr
+chk_msk_cestab()
+{
+	local expected=$1
+	local msg="....chk ${2:-${expected}} cestab"
+
+	if [ "${expected}" -eq 0 ]; then
+		msg+=" after flush"
+	fi
+
+	__chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \
+		 "${expected}" "${msg}" ""
+}
+
+wait_connected()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+
+	local port_hex i
+
+	port_hex="$(printf "%04X" "${port}")"
+	for i in $(seq 10); do
+		ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break
+		sleep 0.1
+	done
+}
 
 trap cleanup EXIT
-ip netns add $ns
-ip -n $ns link set dev lo up
+mptcp_lib_ns_init ns
 
-echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null &
-sleep 0.1
-pids[0]=$!
+echo "a" | \
+	timeout ${timeout_test} \
+		ip netns exec $ns \
+			./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \
+				0.0.0.0 >/dev/null &
+mptcp_lib_wait_local_port_listen $ns 10000
 chk_msk_nr 0 "no msk on netns creation"
+chk_msk_listen 10000
 
-echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null &
-sleep 0.1
-pids[1]=$!
+echo "b" | \
+	timeout ${timeout_test} \
+		ip netns exec $ns \
+			./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
+				127.0.0.1 >/dev/null &
+wait_connected $ns 10000
 chk_msk_nr 2 "after MPC handshake "
 chk_msk_remote_key_nr 2 "....chk remote_key"
 chk_msk_fallback_nr 0 "....chk no fallback"
+chk_msk_inuse 2
+chk_msk_cestab 2
 flush_pids
 
+chk_msk_inuse 0 "2->0"
+chk_msk_cestab 0 "2->0"
 
-echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null &
-pids[0]=$!
-sleep 0.1
-echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null &
-pids[1]=$!
-sleep 0.1
+echo "a" | \
+	timeout ${timeout_test} \
+		ip netns exec $ns \
+			./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \
+				0.0.0.0 >/dev/null &
+mptcp_lib_wait_local_port_listen $ns 10001
+echo "b" | \
+	timeout ${timeout_test} \
+		ip netns exec $ns \
+			./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \
+				127.0.0.1 >/dev/null &
+wait_connected $ns 10001
 chk_msk_fallback_nr 1 "check fallback"
+chk_msk_inuse 1
+chk_msk_cestab 1
 flush_pids
 
+chk_msk_inuse 0 "1->0"
+chk_msk_cestab 0 "1->0"
+
 NR_CLIENTS=100
-for I in `seq 1 $NR_CLIENTS`; do
-	echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null  &
-	pids[$((I*2))]=$!
+for I in $(seq 1 $NR_CLIENTS); do
+	echo "a" | \
+		timeout ${timeout_test} \
+			ip netns exec $ns \
+				./mptcp_connect -p $((I+10001)) -l -w 20 \
+					-t ${timeout_poll} 0.0.0.0 >/dev/null &
+done
+mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001))
+
+for I in $(seq 1 $NR_CLIENTS); do
+	echo "b" | \
+		timeout ${timeout_test} \
+			ip netns exec $ns \
+				./mptcp_connect -p $((I+10001)) -w 20 \
+					-t ${timeout_poll} 127.0.0.1 >/dev/null &
 done
-sleep 0.1
 
-for I in `seq 1 $NR_CLIENTS`; do
-	echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null &
-	pids[$((I*2 + 1))]=$!
+wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+chk_msk_inuse $((NR_CLIENTS*2)) "many"
+chk_msk_cestab $((NR_CLIENTS*2)) "many"
+flush_pids
+
+chk_msk_inuse 0 "many->0"
+chk_msk_cestab 0 "many->0"
+
+chk_listener_nr 0 "no listener sockets"
+NR_SERVERS=100
+for I in $(seq 1 $NR_SERVERS); do
+	ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \
+		-t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 &
 done
-sleep 1.5
+mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001))
 
-chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+chk_listener_nr $NR_SERVERS "many listener sockets"
+
+# graceful termination
+for I in $(seq 1 $NR_SERVERS); do
+	echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 &
+done
 flush_pids
 
+mptcp_lib_result_print_all_tap
 exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 77bb62feb872..d2043ec3bf6d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -6,6 +6,7 @@
 #include <limits.h>
 #include <fcntl.h>
 #include <string.h>
+#include <stdarg.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -13,8 +14,11 @@
 #include <strings.h>
 #include <signal.h>
 #include <unistd.h>
+#include <time.h>
 
+#include <sys/ioctl.h>
 #include <sys/poll.h>
+#include <sys/random.h>
 #include <sys/sendfile.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
@@ -25,6 +29,8 @@
 #include <netinet/in.h>
 
 #include <linux/tcp.h>
+#include <linux/time_types.h>
+#include <linux/sockios.h>
 
 extern int optind;
 
@@ -45,34 +51,103 @@ enum cfg_mode {
 	CFG_MODE_SENDFILE,
 };
 
+enum cfg_peek {
+	CFG_NONE_PEEK,
+	CFG_WITH_PEEK,
+	CFG_AFTER_PEEK,
+};
+
 static enum cfg_mode cfg_mode = CFG_MODE_POLL;
+static enum cfg_peek cfg_peek = CFG_NONE_PEEK;
 static const char *cfg_host;
 static const char *cfg_port	= "12000";
 static int cfg_sock_proto	= IPPROTO_MPTCP;
-static bool tcpulp_audit;
 static int pf = AF_INET;
 static int cfg_sndbuf;
 static int cfg_rcvbuf;
 static bool cfg_join;
 static bool cfg_remove;
+static unsigned int cfg_time;
+static unsigned int cfg_do_w;
 static int cfg_wait;
+static uint32_t cfg_mark;
+static char *cfg_input;
+static int cfg_repeat = 1;
+static int cfg_truncate;
+static int cfg_rcv_trunc;
+
+struct cfg_cmsg_types {
+	unsigned int cmsg_enabled:1;
+	unsigned int timestampns:1;
+	unsigned int tcp_inq:1;
+};
+
+struct cfg_sockopt_types {
+	unsigned int transparent:1;
+	unsigned int mptfo:1;
+};
+
+struct tcp_inq_state {
+	unsigned int last;
+	bool expect_eof;
+};
+
+struct wstate {
+	char buf[8192];
+	unsigned int len;
+	unsigned int off;
+	unsigned int total_len;
+};
+
+static struct tcp_inq_state tcp_inq;
+
+static struct cfg_cmsg_types cfg_cmsg_types;
+static struct cfg_sockopt_types cfg_sockopt_types;
 
 static void die_usage(void)
 {
-	fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
-		"[-l] [-w sec] connect_address\n");
+	fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] "
+		"[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] "
+		"[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n");
 	fprintf(stderr, "\t-6 use ipv6\n");
-	fprintf(stderr, "\t-t num -- set poll timeout to num\n");
-	fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
-	fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+	fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
+	fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount "
+		"of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP "
+		"fastclose at close/shutdown. If offset is negative, expect the peer to close before "
+		"all the local data as been sent, thus toleration errors on write and EPIPE signals\n");
+	fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin");
+	fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num "
+		"incoming connections, in client mode, disconnect and reconnect to the server\n");
+	fprintf(stderr, "\t-j     -- add additional sleep at connection start and tear down "
+		"-- for MPJ tests\n");
+	fprintf(stderr, "\t-l     -- listens mode, accepts incoming connection\n");
+	fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+	fprintf(stderr, "\t-M mark -- set socket packet mark\n");
+	fprintf(stderr, "\t-o option -- test sockopt <option>\n");
 	fprintf(stderr, "\t-p num -- use port num\n");
+	fprintf(stderr,
+		"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
+	fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes "
+		"-- for remove addr tests\n");
+	fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
 	fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
-	fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
-	fprintf(stderr, "\t-u -- check mptcp ulp\n");
+	fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+	fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+	fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
 	fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
 	exit(1);
 }
 
+static void xerror(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(1);
+}
+
 static void handle_signal(int nr)
 {
 	quit = true;
@@ -139,10 +214,81 @@ static void set_sndbuf(int fd, unsigned int size)
 	}
 }
 
+static void set_mark(int fd, uint32_t mark)
+{
+	int err;
+
+	err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
+	if (err) {
+		perror("set SO_MARK");
+		exit(1);
+	}
+}
+
+static void set_transparent(int fd, int pf)
+{
+	int one = 1;
+
+	switch (pf) {
+	case AF_INET:
+		if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)))
+			perror("IP_TRANSPARENT");
+		break;
+	case AF_INET6:
+		if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)))
+			perror("IPV6_TRANSPARENT");
+		break;
+	}
+}
+
+static void set_mptfo(int fd, int pf)
+{
+	int qlen = 25;
+
+	if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1)
+		perror("TCP_FASTOPEN");
+}
+
+static int do_ulp_so(int sock, const char *name)
+{
+	return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name));
+}
+
+#define X(m)	xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line)
+static void sock_test_tcpulp(int sock, int proto, unsigned int line)
+{
+	socklen_t buflen = 8;
+	char buf[8] = "";
+	int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen);
+
+	if (ret != 0)
+		X("getsockopt");
+
+	if (buflen > 0) {
+		if (strcmp(buf, "mptcp") != 0)
+			xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line);
+		ret = do_ulp_so(sock, "tls");
+		if (ret == 0)
+			X("setsockopt");
+	} else if (proto == IPPROTO_MPTCP) {
+		ret = do_ulp_so(sock, "tls");
+		if (ret != -1)
+			X("setsockopt");
+	}
+
+	ret = do_ulp_so(sock, "mptcp");
+	if (ret != -1)
+		X("setsockopt");
+
+#undef X
+}
+
+#define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__)
+
 static int sock_listen_mptcp(const char * const listenaddr,
 			     const char * const port)
 {
-	int sock;
+	int sock = -1;
 	struct addrinfo hints = {
 		.ai_protocol = IPPROTO_TCP,
 		.ai_socktype = SOCK_STREAM,
@@ -162,10 +308,18 @@ static int sock_listen_mptcp(const char * const listenaddr,
 		if (sock < 0)
 			continue;
 
+		SOCK_TEST_TCPULP(sock, cfg_sock_proto);
+
 		if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
 				     sizeof(one)))
 			perror("setsockopt");
 
+		if (cfg_sockopt_types.transparent)
+			set_transparent(sock, pf);
+
+		if (cfg_sockopt_types.mptfo)
+			set_mptfo(sock, pf);
+
 		if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
 			break; /* success */
 
@@ -181,60 +335,30 @@ static int sock_listen_mptcp(const char * const listenaddr,
 		return sock;
 	}
 
+	SOCK_TEST_TCPULP(sock, cfg_sock_proto);
+
 	if (listen(sock, 20)) {
 		perror("listen");
 		close(sock);
 		return -1;
 	}
 
-	return sock;
-}
+	SOCK_TEST_TCPULP(sock, cfg_sock_proto);
 
-static bool sock_test_tcpulp(const char * const remoteaddr,
-			     const char * const port)
-{
-	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
-		.ai_socktype = SOCK_STREAM,
-	};
-	struct addrinfo *a, *addr;
-	int sock = -1, ret = 0;
-	bool test_pass = false;
-
-	hints.ai_family = AF_INET;
-
-	xgetaddrinfo(remoteaddr, port, &hints, &addr);
-	for (a = addr; a; a = a->ai_next) {
-		sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP);
-		if (sock < 0) {
-			perror("socket");
-			continue;
-		}
-		ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp",
-				 sizeof("mptcp"));
-		if (ret == -1 && errno == EOPNOTSUPP)
-			test_pass = true;
-		close(sock);
-
-		if (test_pass)
-			break;
-		if (!ret)
-			fprintf(stderr,
-				"setsockopt(TCP_ULP) returned 0\n");
-		else
-			perror("setsockopt(TCP_ULP)");
-	}
-	return test_pass;
+	return sock;
 }
 
 static int sock_connect_mptcp(const char * const remoteaddr,
-			      const char * const port, int proto)
+			      const char * const port, int proto,
+			      struct addrinfo **peer,
+			      int infd, struct wstate *winfo)
 {
 	struct addrinfo hints = {
 		.ai_protocol = IPPROTO_TCP,
 		.ai_socktype = SOCK_STREAM,
 	};
 	struct addrinfo *a, *addr;
+	int syn_copied = 0;
 	int sock = -1;
 
 	hints.ai_family = pf;
@@ -247,15 +371,44 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 			continue;
 		}
 
-		if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
-			break; /* success */
+		SOCK_TEST_TCPULP(sock, proto);
 
-		perror("connect()");
-		close(sock);
-		sock = -1;
+		if (cfg_mark)
+			set_mark(sock, cfg_mark);
+
+		if (cfg_sockopt_types.mptfo) {
+			if (!winfo->total_len)
+				winfo->total_len = winfo->len = read(infd, winfo->buf,
+								     sizeof(winfo->buf));
+
+			syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN,
+					    a->ai_addr, a->ai_addrlen);
+			if (syn_copied >= 0) {
+				winfo->off = syn_copied;
+				winfo->len -= syn_copied;
+				*peer = a;
+				break; /* success */
+			}
+		} else {
+			if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
+				*peer = a;
+				break; /* success */
+			}
+		}
+		if (cfg_sockopt_types.mptfo) {
+			perror("sendto()");
+			close(sock);
+			sock = -1;
+		} else {
+			perror("connect()");
+			close(sock);
+			sock = -1;
+		}
 	}
 
 	freeaddrinfo(addr);
+	if (sock != -1)
+		SOCK_TEST_TCPULP(sock, proto);
 	return sock;
 }
 
@@ -272,12 +425,12 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
 	if (cfg_join && first && do_w > 100)
 		do_w = 100;
 
-	if (cfg_remove && do_w > 50)
-		do_w = 50;
+	if (cfg_remove && do_w > cfg_do_w)
+		do_w = cfg_do_w;
 
 	bw = write(fd, buf, do_w);
 	if (bw < 0)
-		perror("write");
+		return bw;
 
 	/* let the join handshake complete, before going on */
 	if (cfg_join && first) {
@@ -312,8 +465,105 @@ static size_t do_write(const int fd, char *buf, const size_t len)
 	return offset;
 }
 
+static void process_cmsg(struct msghdr *msgh)
+{
+	struct __kernel_timespec ts;
+	bool inq_found = false;
+	bool ts_found = false;
+	unsigned int inq = 0;
+	struct cmsghdr *cmsg;
+
+	for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+		if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) {
+			memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts));
+			ts_found = true;
+			continue;
+		}
+		if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) {
+			memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq));
+			inq_found = true;
+			continue;
+		}
+
+	}
+
+	if (cfg_cmsg_types.timestampns) {
+		if (!ts_found)
+			xerror("TIMESTAMPNS not present\n");
+	}
+
+	if (cfg_cmsg_types.tcp_inq) {
+		if (!inq_found)
+			xerror("TCP_INQ not present\n");
+
+		if (inq > 1024)
+			xerror("tcp_inq %u is larger than one kbyte\n", inq);
+		tcp_inq.last = inq;
+	}
+}
+
+static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
+{
+	char msg_buf[8192];
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = len,
+	};
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = msg_buf,
+		.msg_controllen = sizeof(msg_buf),
+	};
+	int flags = 0;
+	unsigned int last_hint = tcp_inq.last;
+	int ret = recvmsg(fd, &msg, flags);
+
+	if (ret <= 0) {
+		if (ret == 0 && tcp_inq.expect_eof)
+			return ret;
+
+		if (ret == 0 && cfg_cmsg_types.tcp_inq)
+			if (last_hint != 1 && last_hint != 0)
+				xerror("EOF but last tcp_inq hint was %u\n", last_hint);
+
+		return ret;
+	}
+
+	if (tcp_inq.expect_eof)
+		xerror("expected EOF, last_hint %u, now %u\n",
+		       last_hint, tcp_inq.last);
+
+	if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled)
+		xerror("got %lu bytes of cmsg data, expected 0\n",
+		       (unsigned long)msg.msg_controllen);
+
+	if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled)
+		xerror("%s\n", "got no cmsg data");
+
+	if (msg.msg_controllen)
+		process_cmsg(&msg);
+
+	if (cfg_cmsg_types.tcp_inq) {
+		if ((size_t)ret < len && last_hint > (unsigned int)ret) {
+			if (ret + 1 != (int)last_hint) {
+				int next = read(fd, msg_buf, sizeof(msg_buf));
+
+				xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n",
+				       ret, (unsigned int)len, last_hint, tcp_inq.last, next);
+			} else {
+				tcp_inq.expect_eof = true;
+			}
+		}
+	}
+
+	return ret;
+}
+
 static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
 {
+	int ret = 0;
+	char tmp[16384];
 	size_t cap = rand();
 
 	cap &= 0xffff;
@@ -323,35 +573,62 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
 	else if (cap > len)
 		cap = len;
 
-	return read(fd, buf, cap);
+	if (cfg_peek == CFG_WITH_PEEK) {
+		ret = recv(fd, buf, cap, MSG_PEEK);
+		ret = (ret < 0) ? ret : read(fd, tmp, ret);
+	} else if (cfg_peek == CFG_AFTER_PEEK) {
+		ret = recv(fd, buf, cap, MSG_PEEK);
+		ret = (ret < 0) ? ret : read(fd, buf, cap);
+	} else if (cfg_cmsg_types.cmsg_enabled) {
+		ret = do_recvmsg_cmsg(fd, buf, cap);
+	} else {
+		ret = read(fd, buf, cap);
+	}
+
+	return ret;
 }
 
-static void set_nonblock(int fd)
+static void set_nonblock(int fd, bool nonblock)
 {
 	int flags = fcntl(fd, F_GETFL);
 
 	if (flags == -1)
 		return;
 
-	fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+	if (nonblock)
+		fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+	else
+		fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
+}
+
+static void shut_wr(int fd)
+{
+	/* Close our write side, ev. give some time
+	 * for address notification and/or checking
+	 * the current status
+	 */
+	if (cfg_wait)
+		usleep(cfg_wait);
+
+	shutdown(fd, SHUT_WR);
 }
 
-static int copyfd_io_poll(int infd, int peerfd, int outfd)
+static int copyfd_io_poll(int infd, int peerfd, int outfd,
+			  bool *in_closed_after_out, struct wstate *winfo)
 {
 	struct pollfd fds = {
 		.fd = peerfd,
 		.events = POLLIN | POLLOUT,
 	};
-	unsigned int woff = 0, wlen = 0;
-	char wbuf[8192];
+	unsigned int total_wlen = 0, total_rlen = 0;
 
-	set_nonblock(peerfd);
+	set_nonblock(peerfd, true);
 
 	for (;;) {
 		char rbuf[8192];
 		ssize_t len;
 
-		if (fds.events == 0)
+		if (fds.events == 0 || quit)
 			break;
 
 		switch (poll(&fds, 1, poll_timeout)) {
@@ -368,42 +645,65 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 		}
 
 		if (fds.revents & POLLIN) {
-			len = do_rnd_read(peerfd, rbuf, sizeof(rbuf));
+			ssize_t rb = sizeof(rbuf);
+
+			/* limit the total amount of read data to the trunc value*/
+			if (cfg_truncate > 0) {
+				if (rb + total_rlen > cfg_truncate)
+					rb = cfg_truncate - total_rlen;
+				len = read(peerfd, rbuf, rb);
+			} else {
+				len = do_rnd_read(peerfd, rbuf, sizeof(rbuf));
+			}
 			if (len == 0) {
 				/* no more data to receive:
 				 * peer has closed its write side
 				 */
 				fds.events &= ~POLLIN;
 
-				if ((fds.events & POLLOUT) == 0)
+				if ((fds.events & POLLOUT) == 0) {
+					*in_closed_after_out = true;
 					/* and nothing more to send */
 					break;
+				}
 
 			/* Else, still have data to transmit */
 			} else if (len < 0) {
+				if (cfg_rcv_trunc)
+					return 0;
 				perror("read");
 				return 3;
 			}
 
+			total_rlen += len;
 			do_write(outfd, rbuf, len);
 		}
 
 		if (fds.revents & POLLOUT) {
-			if (wlen == 0) {
-				woff = 0;
-				wlen = read(infd, wbuf, sizeof(wbuf));
+			if (winfo->len == 0) {
+				winfo->off = 0;
+				winfo->len = read(infd, winfo->buf, sizeof(winfo->buf));
 			}
 
-			if (wlen > 0) {
+			if (winfo->len > 0) {
 				ssize_t bw;
 
-				bw = do_rnd_write(peerfd, wbuf + woff, wlen);
-				if (bw < 0)
+				/* limit the total amount of written data to the trunc value */
+				if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate)
+					winfo->len = cfg_truncate - total_wlen;
+
+				bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
+				if (bw < 0) {
+					if (cfg_rcv_trunc)
+						return 0;
+					perror("write");
 					return 111;
+				}
 
-				woff += bw;
-				wlen -= bw;
-			} else if (wlen == 0) {
+				winfo->off += bw;
+				winfo->len -= bw;
+				total_wlen += bw;
+			} else if (winfo->len == 0) {
 				/* We have no more data to send. */
 				fds.events &= ~POLLOUT;
 
@@ -411,14 +711,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 					/* ... and peer also closed already */
 					break;
 
-				/* ... but we still receive.
-				 * Close our write side, ev. give some time
-				 * for address notification and/or checking
-				 * the current status
-				 */
-				if (cfg_wait)
-					usleep(cfg_wait);
-				shutdown(peerfd, SHUT_WR);
+				shut_wr(peerfd);
 			} else {
 				if (errno == EINTR)
 					continue;
@@ -428,17 +721,22 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 		}
 
 		if (fds.revents & (POLLERR | POLLNVAL)) {
+			if (cfg_rcv_trunc)
+				return 0;
 			fprintf(stderr, "Unexpected revents: "
 				"POLLERR/POLLNVAL(%x)\n", fds.revents);
 			return 5;
 		}
+
+		if (cfg_truncate > 0 && total_wlen >= cfg_truncate &&
+		    total_rlen >= cfg_truncate)
+			break;
 	}
 
 	/* leave some time for late join/announce */
-	if (cfg_join || cfg_remove)
+	if (cfg_remove && !quit)
 		usleep(cfg_wait);
 
-	close(peerfd);
 	return 0;
 }
 
@@ -461,10 +759,26 @@ static int do_recvfile(int infd, int outfd)
 	return (int)r;
 }
 
-static int do_mmap(int infd, int outfd, unsigned int size)
+static int spool_buf(int fd, struct wstate *winfo)
+{
+	while (winfo->len) {
+		int ret = write(fd, winfo->buf + winfo->off, winfo->len);
+
+		if (ret < 0) {
+			perror("write");
+			return 4;
+		}
+		winfo->off += ret;
+		winfo->len -= ret;
+	}
+	return 0;
+}
+
+static int do_mmap(int infd, int outfd, unsigned int size,
+		   struct wstate *winfo)
 {
 	char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0);
-	ssize_t ret = 0, off = 0;
+	ssize_t ret = 0, off = winfo->total_len;
 	size_t rem;
 
 	if (inbuf == MAP_FAILED) {
@@ -472,7 +786,11 @@ static int do_mmap(int infd, int outfd, unsigned int size)
 		return 1;
 	}
 
-	rem = size;
+	ret = spool_buf(outfd, winfo);
+	if (ret < 0)
+		return ret;
+
+	rem = size - winfo->total_len;
 
 	while (rem > 0) {
 		ret = write(outfd, inbuf + off, rem);
@@ -516,8 +834,16 @@ static int get_infd_size(int fd)
 	return (int)count;
 }
 
-static int do_sendfile(int infd, int outfd, unsigned int count)
+static int do_sendfile(int infd, int outfd, unsigned int count,
+		       struct wstate *winfo)
 {
+	int ret = spool_buf(outfd, winfo);
+
+	if (ret < 0)
+		return ret;
+
+	count -= winfo->total_len;
+
 	while (count > 0) {
 		ssize_t r;
 
@@ -534,7 +860,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count)
 }
 
 static int copyfd_io_mmap(int infd, int peerfd, int outfd,
-			  unsigned int size)
+			  unsigned int size, bool *in_closed_after_out,
+			  struct wstate *winfo)
 {
 	int err;
 
@@ -543,22 +870,23 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
 		if (err)
 			return err;
 
-		err = do_mmap(infd, peerfd, size);
+		err = do_mmap(infd, peerfd, size, winfo);
 	} else {
-		err = do_mmap(infd, peerfd, size);
+		err = do_mmap(infd, peerfd, size, winfo);
 		if (err)
 			return err;
 
-		shutdown(peerfd, SHUT_WR);
+		shut_wr(peerfd);
 
 		err = do_recvfile(peerfd, outfd);
+		*in_closed_after_out = true;
 	}
 
 	return err;
 }
 
 static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
-			      unsigned int size)
+			      unsigned int size, bool *in_closed_after_out, struct wstate *winfo)
 {
 	int err;
 
@@ -567,40 +895,85 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
 		if (err)
 			return err;
 
-		err = do_sendfile(infd, peerfd, size);
+		err = do_sendfile(infd, peerfd, size, winfo);
 	} else {
-		err = do_sendfile(infd, peerfd, size);
+		err = do_sendfile(infd, peerfd, size, winfo);
 		if (err)
 			return err;
+
+		shut_wr(peerfd);
+
 		err = do_recvfile(peerfd, outfd);
+		*in_closed_after_out = true;
 	}
 
 	return err;
 }
 
-static int copyfd_io(int infd, int peerfd, int outfd)
+static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
 {
+	bool in_closed_after_out = false;
+	struct timespec start, end;
 	int file_size;
+	int ret;
+
+	if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0))
+		xerror("can not fetch start time %d", errno);
 
 	switch (cfg_mode) {
 	case CFG_MODE_POLL:
-		return copyfd_io_poll(infd, peerfd, outfd);
+		ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out,
+				     winfo);
+		break;
+
 	case CFG_MODE_MMAP:
 		file_size = get_infd_size(infd);
 		if (file_size < 0)
 			return file_size;
-		return copyfd_io_mmap(infd, peerfd, outfd, file_size);
+		ret = copyfd_io_mmap(infd, peerfd, outfd, file_size,
+				     &in_closed_after_out, winfo);
+		break;
+
 	case CFG_MODE_SENDFILE:
 		file_size = get_infd_size(infd);
 		if (file_size < 0)
 			return file_size;
-		return copyfd_io_sendfile(infd, peerfd, outfd, file_size);
+		ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size,
+					 &in_closed_after_out, winfo);
+		break;
+
+	default:
+		fprintf(stderr, "Invalid mode %d\n", cfg_mode);
+
+		die_usage();
+		return 1;
 	}
 
-	fprintf(stderr, "Invalid mode %d\n", cfg_mode);
+	if (ret)
+		return ret;
 
-	die_usage();
-	return 1;
+	if (close_peerfd)
+		close(peerfd);
+
+	if (cfg_time) {
+		unsigned int delta_ms;
+
+		if (clock_gettime(CLOCK_MONOTONIC, &end) < 0)
+			xerror("can not fetch end time %d", errno);
+		delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000;
+		if (delta_ms > cfg_time) {
+			xerror("transfer slower than expected! runtime %d ms, expected %d ms",
+			       delta_ms, cfg_time);
+		}
+
+		/* show the runtime only if this end shutdown(wr) before receiving the EOF,
+		 * (that is, if this end got the longer runtime)
+		 */
+		if (in_closed_after_out)
+			fprintf(stderr, "%d", delta_ms);
+	}
+
+	return 0;
 }
 
 static void check_sockaddr(int pf, struct sockaddr_storage *ss,
@@ -693,17 +1066,20 @@ static void maybe_close(int fd)
 {
 	unsigned int r = rand();
 
-	if (!(cfg_join || cfg_remove) && (r & 1))
+	if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1))
 		close(fd);
 }
 
 int main_loop_s(int listensock)
 {
 	struct sockaddr_storage ss;
+	struct wstate winfo;
 	struct pollfd polls;
 	socklen_t salen;
 	int remotesock;
+	int fd = 0;
 
+again:
 	polls.fd = listensock;
 	polls.events = POLLIN;
 
@@ -724,47 +1100,206 @@ int main_loop_s(int listensock)
 		check_sockaddr(pf, &ss, salen);
 		check_getpeername(remotesock, &ss, salen);
 
-		return copyfd_io(0, remotesock, 1);
+		if (cfg_input) {
+			fd = open(cfg_input, O_RDONLY);
+			if (fd < 0)
+				xerror("can't open %s: %d", cfg_input, errno);
+		}
+
+		SOCK_TEST_TCPULP(remotesock, 0);
+
+		memset(&winfo, 0, sizeof(winfo));
+		copyfd_io(fd, remotesock, 1, true, &winfo);
+	} else {
+		perror("accept");
+		return 1;
 	}
 
-	perror("accept");
+	if (--cfg_repeat > 0) {
+		if (cfg_input)
+			close(fd);
+		goto again;
+	}
 
-	return 1;
+	return 0;
 }
 
 static void init_rng(void)
 {
-	int fd = open("/dev/urandom", O_RDONLY);
 	unsigned int foo;
 
-	if (fd > 0) {
-		int ret = read(fd, &foo, sizeof(foo));
-
-		if (ret < 0)
-			srand(fd + foo);
-		close(fd);
+	if (getrandom(&foo, sizeof(foo), 0) == -1) {
+		perror("getrandom");
+		exit(1);
 	}
 
 	srand(foo);
 }
 
+static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen)
+{
+	int err;
+
+	err = setsockopt(fd, level, optname, optval, optlen);
+	if (err) {
+		perror("setsockopt");
+		exit(1);
+	}
+}
+
+static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg)
+{
+	static const unsigned int on = 1;
+
+	if (cmsg->timestampns)
+		xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on));
+	if (cmsg->tcp_inq)
+		xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on));
+}
+
+static void parse_cmsg_types(const char *type)
+{
+	char *next = strchr(type, ',');
+	unsigned int len = 0;
+
+	cfg_cmsg_types.cmsg_enabled = 1;
+
+	if (next) {
+		parse_cmsg_types(next + 1);
+		len = next - type;
+	} else {
+		len = strlen(type);
+	}
+
+	if (strncmp(type, "TIMESTAMPNS", len) == 0) {
+		cfg_cmsg_types.timestampns = 1;
+		return;
+	}
+
+	if (strncmp(type, "TCPINQ", len) == 0) {
+		cfg_cmsg_types.tcp_inq = 1;
+		return;
+	}
+
+	fprintf(stderr, "Unrecognized cmsg option %s\n", type);
+	exit(1);
+}
+
+static void parse_setsock_options(const char *name)
+{
+	char *next = strchr(name, ',');
+	unsigned int len = 0;
+
+	if (next) {
+		parse_setsock_options(next + 1);
+		len = next - name;
+	} else {
+		len = strlen(name);
+	}
+
+	if (strncmp(name, "TRANSPARENT", len) == 0) {
+		cfg_sockopt_types.transparent = 1;
+		return;
+	}
+
+	if (strncmp(name, "MPTFO", len) == 0) {
+		cfg_sockopt_types.mptfo = 1;
+		return;
+	}
+
+	fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
+	exit(1);
+}
+
+void xdisconnect(int fd, int addrlen)
+{
+	struct sockaddr_storage empty;
+	int msec_sleep = 10;
+	int queued = 1;
+	int i;
+
+	shutdown(fd, SHUT_WR);
+
+	/* while until the pending data is completely flushed, the later
+	 * disconnect will bypass/ignore/drop any pending data.
+	 */
+	for (i = 0; ; i += msec_sleep) {
+		if (ioctl(fd, SIOCOUTQ, &queued) < 0)
+			xerror("can't query out socket queue: %d", errno);
+
+		if (!queued)
+			break;
+
+		if (i > poll_timeout)
+			xerror("timeout while waiting for spool to complete");
+		usleep(msec_sleep * 1000);
+	}
+
+	memset(&empty, 0, sizeof(empty));
+	empty.ss_family = AF_UNSPEC;
+	if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0)
+		xerror("can't disconnect: %d", errno);
+}
+
 int main_loop(void)
 {
-	int fd;
+	int fd = 0, ret, fd_in = 0;
+	struct addrinfo *peer;
+	struct wstate winfo;
 
-	/* listener is ready. */
-	fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto);
+	if (cfg_input && cfg_sockopt_types.mptfo) {
+		fd_in = open(cfg_input, O_RDONLY);
+		if (fd < 0)
+			xerror("can't open %s:%d", cfg_input, errno);
+	}
+
+	memset(&winfo, 0, sizeof(winfo));
+	fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo);
 	if (fd < 0)
 		return 2;
 
+again:
 	check_getpeername_connect(fd);
 
+	SOCK_TEST_TCPULP(fd, cfg_sock_proto);
+
 	if (cfg_rcvbuf)
 		set_rcvbuf(fd, cfg_rcvbuf);
 	if (cfg_sndbuf)
 		set_sndbuf(fd, cfg_sndbuf);
+	if (cfg_cmsg_types.cmsg_enabled)
+		apply_cmsg_types(fd, &cfg_cmsg_types);
 
-	return copyfd_io(0, fd, 1);
+	if (cfg_input && !cfg_sockopt_types.mptfo) {
+		fd_in = open(cfg_input, O_RDONLY);
+		if (fd < 0)
+			xerror("can't open %s:%d", cfg_input, errno);
+	}
+
+	ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
+	if (ret)
+		return ret;
+
+	if (cfg_truncate > 0) {
+		xdisconnect(fd, peer->ai_addrlen);
+	} else if (--cfg_repeat > 0) {
+		xdisconnect(fd, peer->ai_addrlen);
+
+		/* the socket could be unblocking at this point, we need the
+		 * connect to be blocking
+		 */
+		set_nonblock(fd, false);
+		if (connect(fd, peer->ai_addr, peer->ai_addrlen))
+			xerror("can't reconnect: %d", errno);
+		if (cfg_input)
+			close(fd_in);
+		memset(&winfo, 0, sizeof(winfo));
+		goto again;
+	} else {
+		close(fd);
+	}
+
+	return 0;
 }
 
 int parse_proto(const char *proto)
@@ -802,6 +1337,26 @@ int parse_mode(const char *mode)
 	return 0;
 }
 
+int parse_peek(const char *mode)
+{
+	if (!strcasecmp(mode, "saveWithPeek"))
+		return CFG_WITH_PEEK;
+	if (!strcasecmp(mode, "saveAfterPeek"))
+		return CFG_AFTER_PEEK;
+
+	fprintf(stderr, "Unknown: %s\n", mode);
+	fprintf(stderr, "Supported MSG_PEEK mode are:\n");
+	fprintf(stderr,
+		"\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n");
+	fprintf(stderr,
+		"\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n");
+
+	die_usage();
+
+	/* silence compiler warning */
+	return 0;
+}
+
 static int parse_int(const char *size)
 {
 	unsigned long s;
@@ -829,17 +1384,36 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) {
+	while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) {
 		switch (c) {
+		case 'f':
+			cfg_truncate = atoi(optarg);
+
+			/* when receiving a fastclose, ignore PIPE signals and
+			 * all the I/O errors later in the code
+			 */
+			if (cfg_truncate < 0) {
+				cfg_rcv_trunc = true;
+				signal(SIGPIPE, handle_signal);
+			}
+			break;
 		case 'j':
 			cfg_join = true;
 			cfg_mode = CFG_MODE_POLL;
-			cfg_wait = 400000;
 			break;
 		case 'r':
 			cfg_remove = true;
 			cfg_mode = CFG_MODE_POLL;
 			cfg_wait = 400000;
+			cfg_do_w = atoi(optarg);
+			if (cfg_do_w <= 0)
+				cfg_do_w = 50;
+			break;
+		case 'i':
+			cfg_input = optarg;
+			break;
+		case 'I':
+			cfg_repeat = atoi(optarg);
 			break;
 		case 'l':
 			listen_mode = true;
@@ -853,9 +1427,6 @@ static void parse_opts(int argc, char **argv)
 		case 'h':
 			die_usage();
 			break;
-		case 'u':
-			tcpulp_audit = true;
-			break;
 		case '6':
 			pf = AF_INET6;
 			break;
@@ -864,6 +1435,9 @@ static void parse_opts(int argc, char **argv)
 			if (poll_timeout <= 0)
 				poll_timeout = -1;
 			break;
+		case 'T':
+			cfg_time = atoi(optarg);
+			break;
 		case 'm':
 			cfg_mode = parse_mode(optarg);
 			break;
@@ -876,6 +1450,18 @@ static void parse_opts(int argc, char **argv)
 		case 'w':
 			cfg_wait = atoi(optarg)*1000000;
 			break;
+		case 'M':
+			cfg_mark = strtol(optarg, NULL, 0);
+			break;
+		case 'P':
+			cfg_peek = parse_peek(optarg);
+			break;
+		case 'c':
+			parse_cmsg_types(optarg);
+			break;
+		case 'o':
+			parse_setsock_options(optarg);
+			break;
 		}
 	}
 
@@ -894,9 +1480,6 @@ int main(int argc, char *argv[])
 	signal(SIGUSR1, handle_signal);
 	parse_opts(argc, argv);
 
-	if (tcpulp_audit)
-		return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1;
-
 	if (listen_mode) {
 		int fd = sock_listen_mptcp(cfg_host, cfg_port);
 
@@ -907,6 +1490,10 @@ int main(int argc, char *argv[])
 			set_rcvbuf(fd, cfg_rcvbuf);
 		if (cfg_sndbuf)
 			set_sndbuf(fd, cfg_sndbuf);
+		if (cfg_mark)
+			set_mark(fd, cfg_mark);
+		if (cfg_cmsg_types.cmsg_enabled)
+			apply_cmsg_types(fd, &cfg_cmsg_types);
 
 		return main_loop_s(fd);
 	}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 2cfd87d94db8..4c4248554826 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -1,17 +1,26 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
 time_start=$(date +%s)
 
-optstring="S:R:d:e:l:r:h4cm:f:t"
+optstring="S:R:d:e:l:r:h4cm:f:tC"
 ret=0
+final_ret=0
 sin=""
 sout=""
+cin_disconnect=""
 cin=""
 cout=""
-ksft_skip=4
 capture=false
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
 ipv6=true
 ethtool_random_on=true
 tc_delay="$((RANDOM%50))"
@@ -21,7 +30,10 @@ sndbuf=0
 rcvbuf=0
 options_log=true
 do_tcp=0
+checksum=false
 filesize=0
+connect_per_transfer=1
+port=$((10000 - 1))
 
 if [ $tc_loss -eq 100 ];then
 	tc_loss=1%
@@ -46,20 +58,21 @@ usage() {
 	echo -e "\t-R: set rcvbuf value (default: use kernel default)"
 	echo -e "\t-m: test mode (poll, sendfile; default: poll)"
 	echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
+	echo -e "\t-C: enable the MPTCP data checksum"
 }
 
 while getopts "$optstring" option;do
 	case "$option" in
 	"h")
 		usage $0
-		exit 0
+		exit ${KSFT_PASS}
 		;;
 	"d")
 		if [ $OPTARG -ge 0 ];then
 			tc_delay="$OPTARG"
 		else
 			echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2
-			exit 1
+			exit ${KSFT_FAIL}
 		fi
 		;;
 	"e")
@@ -83,7 +96,7 @@ while getopts "$optstring" option;do
 			sndbuf="$OPTARG"
 		else
 			echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
-			exit 1
+			exit ${KSFT_FAIL}
 		fi
 		;;
 	"R")
@@ -91,7 +104,7 @@ while getopts "$optstring" option;do
 			rcvbuf="$OPTARG"
 		else
 			echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
-			exit 1
+			exit ${KSFT_FAIL}
 		fi
 		;;
 	"m")
@@ -103,51 +116,49 @@ while getopts "$optstring" option;do
 	"t")
 		do_tcp=$((do_tcp+1))
 		;;
+	"C")
+		checksum=true
+		;;
 	"?")
 		usage $0
-		exit 1
+		exit ${KSFT_FAIL}
 		;;
 	esac
 done
 
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
-ns4="ns4-$rndh"
+ns1=""
+ns2=""
+ns3=""
+ns4=""
 
-TEST_COUNT=0
+TEST_GROUP=""
 
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
 cleanup()
 {
+	rm -f "$cin_disconnect" "$cout_disconnect"
 	rm -f "$cin" "$cout"
 	rm -f "$sin" "$sout"
 	rm -f "$capout"
 
-	local netns
-	for netns in "$ns1" "$ns2" "$ns3" "$ns4";do
-		ip netns del $netns
-	done
+	mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}"
 }
 
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without ip tool"
-	exit $ksft_skip
-fi
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+mptcp_lib_check_tools ip
 
 sin=$(mktemp)
 sout=$(mktemp)
 cin=$(mktemp)
 cout=$(mktemp)
 capout=$(mktemp)
+cin_disconnect="$cin".disconnect
+cout_disconnect="$cout".disconnect
 trap cleanup EXIT
 
-for i in "$ns1" "$ns2" "$ns3" "$ns4";do
-	ip netns add $i || exit $ksft_skip
-	ip -net $i link set lo up
-done
+mptcp_lib_ns_init ns1 ns2 ns3 ns4
 
 #  "$ns1"              ns2                    ns3                     ns4
 # ns1eth2    ns2eth1   ns2eth3      ns3eth2   ns3eth4       ns4eth3
@@ -195,16 +206,20 @@ ip -net "$ns4" link set ns4eth3 up
 ip -net "$ns4" route add default via 10.0.3.2
 ip -net "$ns4" route add default via dead:beef:3::2
 
-# use TCP syn cookies, even if no flooding was detected.
-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+if $checksum; then
+	for i in "$ns1" "$ns2" "$ns3" "$ns4";do
+		ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
+	done
+fi
 
 set_ethtool_flags() {
 	local ns="$1"
 	local dev="$2"
 	local flags="$3"
 
-	ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
-	[ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
+	if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then
+		mptcp_lib_pr_info "set $ns dev $dev: ethtool -K $flags"
+	fi
 }
 
 set_random_ethtool_flags() {
@@ -232,100 +247,62 @@ else
 	set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
 fi
 
-print_file_err()
-{
-	ls -l "$1" 1>&2
-	echo "Trailing bytes are: "
-	tail -c 27 "$1"
-}
-
-check_transfer()
-{
-	local in=$1
-	local out=$2
-	local what=$3
-
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
-		echo "[ FAIL ] $what does not match (in, out):"
-		print_file_err "$in"
-		print_file_err "$out"
-
-		return 1
-	fi
-
-	return 0
+print_larger_title() {
+	# here we don't have the time, a bit longer for the alignment
+	MPTCP_LIB_TEST_FORMAT="%02u %-69s" \
+		mptcp_lib_print_title "${@}"
 }
 
 check_mptcp_disabled()
 {
 	local disabled_ns
-	disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)"
-	ip netns add ${disabled_ns} || exit $ksft_skip
+	mptcp_lib_ns_init disabled_ns
 
+	print_larger_title "New MPTCP socket can be blocked via sysctl"
 	# net.mptcp.enabled should be enabled by default
 	if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
-		echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
-		ret=1
+		mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default"
+		mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default"
+		ret=${KSFT_FAIL}
 		return 1
 	fi
 	ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
 
 	local err=0
-	LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
+	LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
 		grep -q "^socket: Protocol not available$" && err=1
-	ip netns delete ${disabled_ns}
+	mptcp_lib_ns_exit "${disabled_ns}"
 
 	if [ ${err} -eq 0 ]; then
-		echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
-		ret=1
+		mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl"
+		mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl"
+		ret=${KSFT_FAIL}
 		return 1
 	fi
 
-	echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
+	mptcp_lib_pr_ok
+	mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl"
 	return 0
 }
 
-check_mptcp_ulp_setsockopt()
-{
-	local t retval
-	t="ns_ulp-$sech-$(mktemp -u XXXXXX)"
-
-	ip netns add ${t} || exit $ksft_skip
-	if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then
-		printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n"
-		retval=1
-		ret=$retval
-	else
-		printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n"
-		retval=0
-	fi
-	ip netns del ${t}
-	return $retval
-}
-
-# $1: IP address
-is_v6()
-{
-	[ -z "${1##*:*}" ]
-}
-
 do_ping()
 {
 	local listener_ns="$1"
 	local connector_ns="$2"
 	local connect_addr="$3"
 	local ping_args="-q -c 1"
+	local rc=0
 
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		$ipv6 || return 0
 		ping_args="${ping_args} -6"
 	fi
 
-	ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null
-	if [ $? -ne 0 ] ; then
-		echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
-		ret=1
+	ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1
+
+	if [ $rc -ne 0 ] ; then
+		mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity"
+		ret=${KSFT_FAIL}
 
 		return 1
 	fi
@@ -333,23 +310,6 @@ do_ping()
 	return 0
 }
 
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex i
-
-	port_hex="$(printf "%04X" "${port}")"
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
 do_transfer()
 {
 	local listener_ns="$1"
@@ -358,28 +318,26 @@ do_transfer()
 	local srv_proto="$4"
 	local connect_addr="$5"
 	local local_addr="$6"
-	local extra_args=""
+	local extra_args="$7"
 
-	local port
-	port=$((10000+$TEST_COUNT))
-	TEST_COUNT=$((TEST_COUNT+1))
+	port=$((port + 1))
 
 	if [ "$rcvbuf" -gt 0 ]; then
-		extra_args="$extra_args -R $rcvbuf"
+		extra_args+=" -R $rcvbuf"
 	fi
 
 	if [ "$sndbuf" -gt 0 ]; then
-		extra_args="$extra_args -S $sndbuf"
+		extra_args+=" -S $sndbuf"
 	fi
 
 	if [ -n "$testmode" ]; then
-		extra_args="$extra_args -m $testmode"
+		extra_args+=" -m $testmode"
 	fi
 
 	if [ -n "$extra_args" ] && $options_log; then
-		options_log=false
-		echo "INFO: extra options: $extra_args"
+		mptcp_lib_pr_info "extra options: $extra_args"
 	fi
+	options_log=false
 
 	:> "$cout"
 	:> "$sout"
@@ -387,10 +345,13 @@ do_transfer()
 
 	local addr_port
 	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
-	printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto}
+	local result_msg
+	result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
+	mptcp_lib_print_title "${result_msg}"
 
 	if $capture; then
 		local capuser
+		local rndh="${connector_ns:4}"
 		if [ -z $SUDO_USER ] ; then
 			capuser=""
 		else
@@ -409,19 +370,40 @@ do_transfer()
 		sleep 1
 	fi
 
-	local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do  echo $count;done)
-	local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do  echo $count;done)
-	local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do  echo $count;done)
-	local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do  echo $count;done)
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat -n
+	if [ ${listener_ns} != ${connector_ns} ]; then
+		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+			nstat -n
+	fi
 
-	ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
+	local stat_synrx_last_l
+	local stat_ackrx_last_l
+	local stat_cookietx_last
+	local stat_cookierx_last
+	local stat_csum_err_s
+	local stat_csum_err_c
+	stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+	stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+	stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+	stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+	stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+	timeout ${timeout_test} \
+		ip netns exec ${listener_ns} \
+			./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+				$extra_args $local_addr < "$sin" > "$sout" &
 	local spid=$!
 
-	wait_local_port_listen "${listener_ns}" "${port}"
+	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	local start
 	start=$(date +%s%3N)
-	ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" &
+	timeout ${timeout_test} \
+		ip netns exec ${connector_ns} \
+			./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+				$extra_args $connect_addr < "$cin" > "$cout" &
 	local cpid=$!
 
 	wait $cpid
@@ -438,30 +420,48 @@ do_transfer()
 		kill ${cappid_connector}
 	fi
 
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat | grep Tcp > /tmp/${listener_ns}.out
+	if [ ${listener_ns} != ${connector_ns} ]; then
+		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+			nstat | grep Tcp > /tmp/${connector_ns}.out
+	fi
+
 	local duration
 	duration=$((stop-start))
-	duration=$(printf "(duration %05sms)" $duration)
+	result_msg+=" # time=${duration}ms"
+	printf "(duration %05sms) " "${duration}"
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
-		echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
+		mptcp_lib_pr_fail "client exit code $retc, server $rets"
 		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+		cat /tmp/${listener_ns}.out
 		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+		[ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out
 
+		echo
 		cat "$capout"
+		mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
 		return 1
 	fi
 
-	check_transfer $sin $cout "file received by client"
+	mptcp_lib_check_transfer $sin $cout "file received by client"
 	retc=$?
-	check_transfer $cin $sout "file received by server"
+	mptcp_lib_check_transfer $cin $sout "file received by server"
 	rets=$?
 
-	local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX  | while read a count c rest ;do  echo $count;done)
-	local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX  | while read a count c rest ;do  echo $count;done)
-
-	local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do  echo $count;done)
-	local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do  echo $count;done)
+	local extra=""
+	local stat_synrx_now_l
+	local stat_ackrx_now_l
+	local stat_cookietx_now
+	local stat_cookierx_now
+	local stat_ooo_now
+	stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+	stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+	stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+	stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
 
 	expect_synrx=$((stat_synrx_last_l))
 	expect_ackrx=$((stat_ackrx_last_l))
@@ -470,40 +470,81 @@ do_transfer()
 	cookies=${cookies##*=}
 
 	if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
-		expect_synrx=$((stat_synrx_last_l+1))
-		expect_ackrx=$((stat_ackrx_last_l+1))
+		expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
+		expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+	fi
+
+	if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
+		mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+				  "than expected (${expect_synrx})"
+		retc=1
+	fi
+	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
+		if [ ${stat_ooo_now} -eq 0 ]; then
+			mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+					  "than expected (${expect_ackrx})"
+			rets=1
+		else
+			extra+=" [ Note ] fallback due to TCP OoO"
+		fi
+	fi
+
+	if $checksum; then
+		local csum_err_s
+		local csum_err_c
+		csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+		csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+		local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
+		if [ $csum_err_s_nr -gt 0 ]; then
+			mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
+			rets=1
+		fi
+
+		local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
+		if [ $csum_err_c_nr -gt 0 ]; then
+			mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
+			retc=1
+		fi
 	fi
+
 	if [ $cookies -eq 2 ];then
 		if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
-			echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance"
+			extra+=" WARN: CookieSent: did not advance"
 		fi
 		if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
-			echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance"
+			extra+=" WARN: CookieRecv: did not advance"
 		fi
 	else
 		if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
-			echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed"
+			extra+=" WARN: CookieSent: changed"
 		fi
 		if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
-			echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed"
+			extra+=" WARN: CookieRecv: changed"
 		fi
 	fi
 
-	if [ $expect_synrx -ne $stat_synrx_now_l ] ;then
-		echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}"
+	if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
+		extra+=" WARN: SYNRX: expect ${expect_synrx},"
+		extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
 	fi
-	if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then
-		echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}"
+	if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
+		extra+=" WARN: ACKRX: expect ${expect_ackrx},"
+		extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
 	fi
 
-	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
-		echo "$duration [ OK ]"
-		cat "$capout"
-		return 0
+	if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
+		mptcp_lib_pr_ok "${extra:1}"
+		mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+	else
+		if [ -n "${extra}" ]; then
+			mptcp_lib_print_warn "${extra:1}"
+		fi
+		mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
 	fi
 
 	cat "$capout"
-	return 1
+	[ $retc -eq 0 ] && [ $rets -eq 0 ]
 }
 
 make_file()
@@ -524,9 +565,8 @@ make_file()
 	ksize=$((SIZE / 1024))
 	rem=$((SIZE - (ksize * 1024)))
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
-	dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+	mptcp_lib_make_file $name 1024 $ksize
+	dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
 
 	echo "Created $name (size $(du -b "$name")) containing data sent by $who"
 }
@@ -537,6 +577,7 @@ run_tests_lo()
 	local connector_ns="$2"
 	local connect_addr="$3"
 	local loopback="$4"
+	local extra_args="$5"
 	local lret=0
 
 	# skip if test programs are running inside same netns for subsequent runs.
@@ -545,18 +586,19 @@ run_tests_lo()
 	fi
 
 	# skip if we don't want v6
-	if ! $ipv6 && is_v6 "${connect_addr}"; then
+	if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
 		return 0
 	fi
 
 	local local_addr
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		local_addr="::"
 	else
 		local_addr="0.0.0.0"
 	fi
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr}
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
+		    ${connect_addr} ${local_addr} "${extra_args}"
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
@@ -570,14 +612,16 @@ run_tests_lo()
 		fi
 	fi
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
+	do_transfer ${listener_ns} ${connector_ns} MPTCP TCP \
+		    ${connect_addr} ${local_addr} "${extra_args}"
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
 		return 1
 	fi
 
-	do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr}
+	do_transfer ${listener_ns} ${connector_ns} TCP MPTCP \
+		    ${connect_addr} ${local_addr} "${extra_args}"
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
@@ -585,7 +629,8 @@ run_tests_lo()
 	fi
 
 	if [ $do_tcp -gt 1 ] ;then
-		do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr}
+		do_transfer ${listener_ns} ${connector_ns} TCP TCP \
+			    ${connect_addr} ${local_addr} "${extra_args}"
 		lret=$?
 		if [ $lret -ne 0 ]; then
 			ret=$lret
@@ -601,14 +646,204 @@ run_tests()
 	run_tests_lo $1 $2 $3 0
 }
 
+run_test_transparent()
+{
+	local connect_addr="$1"
+	local msg="$2"
+
+	local connector_ns="$ns1"
+	local listener_ns="$ns2"
+	local lret=0
+	local r6flag=""
+
+	TEST_GROUP="${msg}"
+
+	# skip if we don't want v6
+	if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
+		return 0
+	fi
+
+	# IP(V6)_TRANSPARENT has been added after TOS support which came with
+	# the required infrastructure in MPTCP sockopt code. To support TOS, the
+	# following function has been exported (T). Not great but better than
+	# checking for a specific kernel version.
+	if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
+		mptcp_lib_pr_skip "${msg} not supported by the kernel"
+		mptcp_lib_result_skip "${TEST_GROUP}"
+		return
+	fi
+
+	if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+flush ruleset
+table inet mangle {
+	chain divert {
+		type filter hook prerouting priority -150;
+
+		meta l4proto tcp socket transparent 1 meta mark set 1 accept
+		tcp dport 20000 tproxy to :20000 meta mark set 1 accept
+	}
+}
+EOF
+	then
+		mptcp_lib_pr_skip "$msg, could not load nft ruleset"
+		mptcp_lib_fail_if_expected_feature "nft rules"
+		mptcp_lib_result_skip "${TEST_GROUP}"
+		return
+	fi
+
+	local local_addr
+	if mptcp_lib_is_v6 "${connect_addr}"; then
+		local_addr="::"
+		r6flag="-6"
+	else
+		local_addr="0.0.0.0"
+	fi
+
+	if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then
+		ip netns exec "$listener_ns" nft flush ruleset
+		mptcp_lib_pr_skip "$msg, ip $r6flag rule failed"
+		mptcp_lib_fail_if_expected_feature "ip rule"
+		mptcp_lib_result_skip "${TEST_GROUP}"
+		return
+	fi
+
+	if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then
+		ip netns exec "$listener_ns" nft flush ruleset
+		ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+		mptcp_lib_pr_skip "$msg, ip route add local $local_addr failed"
+		mptcp_lib_fail_if_expected_feature "ip route"
+		mptcp_lib_result_skip "${TEST_GROUP}"
+		return
+	fi
+
+	mptcp_lib_pr_info "test $msg"
+
+	port=$((20000 - 1))
+	local extra_args="-o TRANSPARENT"
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
+		    ${connect_addr} ${local_addr} "${extra_args}"
+	lret=$?
+
+	ip netns exec "$listener_ns" nft flush ruleset
+	ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+	ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
+
+	if [ $lret -ne 0 ]; then
+		mptcp_lib_pr_fail "$msg, mptcp connection error"
+		ret=$lret
+		return 1
+	fi
+
+	mptcp_lib_pr_info "$msg pass"
+	return 0
+}
+
+run_tests_peekmode()
+{
+	local peekmode="$1"
+
+	TEST_GROUP="peek mode: ${peekmode}"
+	mptcp_lib_pr_info "with peek mode: ${peekmode}"
+	run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}"
+	run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
+}
+
+run_tests_mptfo()
+{
+	TEST_GROUP="MPTFO"
+
+	if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then
+		mptcp_lib_pr_skip "TFO not supported by the kernel"
+		mptcp_lib_result_skip "${TEST_GROUP}"
+		return
+	fi
+
+	mptcp_lib_pr_info "with MPTFO start"
+	ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
+	ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
+
+	run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
+	run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
+
+	run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
+	run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
+
+	ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
+	ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
+	mptcp_lib_pr_info "with MPTFO end"
+}
+
+run_tests_disconnect()
+{
+	local old_cin=$cin
+	local old_sin=$sin
+
+	TEST_GROUP="full disconnect"
+
+	if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then
+		mptcp_lib_pr_skip "Full disconnect not supported"
+		mptcp_lib_result_skip "${TEST_GROUP}"
+		return
+	fi
+
+	cat $cin $cin $cin > "$cin".disconnect
+
+	# force do_transfer to cope with the multiple transmissions
+	sin="$cin.disconnect"
+	cin="$cin.disconnect"
+	cin_disconnect="$old_cin"
+	connect_per_transfer=3
+
+	mptcp_lib_pr_info "disconnect"
+	run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin"
+	run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
+
+	# restore previous status
+	sin=$old_sin
+	cin=$old_cin
+	cin_disconnect="$cin".disconnect
+	connect_per_transfer=1
+}
+
+display_time()
+{
+	time_end=$(date +%s)
+	time_run=$((time_end-time_start))
+
+	echo "Time: ${time_run} seconds"
+}
+
+log_if_error()
+{
+	local msg="$1"
+
+	if [ ${ret} -ne 0 ]; then
+		mptcp_lib_pr_fail "${msg}"
+
+		final_ret=${ret}
+		ret=${KSFT_PASS}
+
+		return ${final_ret}
+	fi
+}
+
+stop_if_error()
+{
+	if ! log_if_error "${@}"; then
+		display_time
+		mptcp_lib_result_print_all_tap
+		exit ${final_ret}
+	fi
+}
+
 make_file "$cin" "client"
 make_file "$sin" "server"
 
 check_mptcp_disabled
 
-check_mptcp_ulp_setsockopt
+stop_if_error "The kernel configuration is not valid for MPTCP"
 
-echo "INFO: validating network environment with pings"
+print_larger_title "Validating network environment with pings"
 for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
 	do_ping "$ns1" $sender 10.0.1.1
 	do_ping "$ns1" $sender dead:beef:1::1
@@ -627,11 +862,16 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
 	do_ping "$ns4" $sender dead:beef:3::1
 done
 
+mptcp_lib_result_code "${ret}" "ping tests"
+
+stop_if_error "Could not even run ping tests"
+mptcp_lib_pr_ok
+
 [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
-echo -n "INFO: Using loss of $tc_loss "
-test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+tc_info="loss of $tc_loss "
+test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms "
 
-reorder_delay=$(($tc_delay / 4))
+reorder_delay=$((tc_delay / 4))
 
 if [ -z "${tc_reorder}" ]; then
 	reorder1=$((RANDOM%10))
@@ -640,32 +880,41 @@ if [ -z "${tc_reorder}" ]; then
 
 	if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
 		tc_reorder="reorder ${reorder1}% ${reorder2}%"
-		echo -n "$tc_reorder with delay ${reorder_delay}ms "
+		tc_info+="$tc_reorder with delay ${reorder_delay}ms "
 	fi
 elif [ "$tc_reorder" = "0" ];then
 	tc_reorder=""
 elif [ "$reorder_delay" -gt 0 ];then
 	# reordering requires some delay
 	tc_reorder="reorder $tc_reorder"
-	echo -n "$tc_reorder with delay ${reorder_delay}ms "
+	tc_info+="$tc_reorder with delay ${reorder_delay}ms "
 fi
 
-echo "on ns3eth4"
+mptcp_lib_pr_info "Using ${tc_info}on ns3eth4"
 
 tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
 
+TEST_GROUP="loopback v4"
+run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
+stop_if_error "Could not even run loopback test"
+
+TEST_GROUP="loopback v6"
+run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
+stop_if_error "Could not even run loopback v6 test"
+
+TEST_GROUP="multihosts"
 for sender in $ns1 $ns2 $ns3 $ns4;do
-	run_tests_lo "$ns1" "$sender" 10.0.1.1 1
-	if [ $ret -ne 0 ] ;then
-		echo "FAIL: Could not even run loopback test" 1>&2
-		exit $ret
-	fi
-	run_tests_lo "$ns1" $sender dead:beef:1::1 1
-	if [ $ret -ne 0 ] ;then
-		echo "FAIL: Could not even run loopback v6 test" 2>&1
-		exit $ret
+	# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+	# mptcp syncookie support.
+	if [ $sender = $ns1 ]; then
+		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+	else
+		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
 	fi
 
+	run_tests "$ns1" $sender 10.0.1.1
+	run_tests "$ns1" $sender dead:beef:1::1
+
 	run_tests "$ns2" $sender 10.0.1.2
 	run_tests "$ns2" $sender dead:beef:1::2
 	run_tests "$ns2" $sender 10.0.2.1
@@ -678,11 +927,26 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
 
 	run_tests "$ns4" $sender 10.0.3.1
 	run_tests "$ns4" $sender dead:beef:3::1
+
+	log_if_error "Tests with $sender as a sender have failed"
 done
 
-time_end=$(date +%s)
-time_run=$((time_end-time_start))
+run_tests_peekmode "saveWithPeek"
+run_tests_peekmode "saveAfterPeek"
+log_if_error "Tests with peek mode have failed"
+
+# MPTFO (MultiPath TCP Fatopen tests)
+run_tests_mptfo
+log_if_error "Tests with MPTFO have failed"
+
+# connect to ns4 ip address, ns2 should intercept/proxy
+run_test_transparent 10.0.3.1 "tproxy ipv4"
+run_test_transparent dead:beef:3::1 "tproxy ipv6"
+log_if_error "Tests with tproxy have failed"
 
-echo "Time: ${time_run} seconds"
+run_tests_disconnect
+log_if_error "Tests of the full disconnection have failed"
 
-exit $ret
+display_time
+mptcp_lib_result_print_all_tap
+exit ${final_ret}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
new file mode 100644
index 000000000000..218aac467321
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <time.h>
+
+#include <sys/ioctl.h>
+#include <sys/random.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <netdb.h>
+#include <netinet/in.h>
+
+#include <linux/tcp.h>
+#include <linux/sockios.h>
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+#ifndef SOL_MPTCP
+#define SOL_MPTCP 284
+#endif
+
+static int pf = AF_INET;
+static int proto_tx = IPPROTO_MPTCP;
+static int proto_rx = IPPROTO_MPTCP;
+
+static void die_perror(const char *msg)
+{
+	perror(msg);
+	exit(1);
+}
+
+static void die_usage(int r)
+{
+	fprintf(stderr, "Usage: mptcp_inq [-6] [ -t tcp|mptcp ] [ -r tcp|mptcp]\n");
+	exit(r);
+}
+
+static void xerror(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	fputc('\n', stderr);
+	exit(1);
+}
+
+static const char *getxinfo_strerr(int err)
+{
+	if (err == EAI_SYSTEM)
+		return strerror(errno);
+
+	return gai_strerror(err);
+}
+
+static void xgetaddrinfo(const char *node, const char *service,
+			 const struct addrinfo *hints,
+			 struct addrinfo **res)
+{
+	int err = getaddrinfo(node, service, hints, res);
+
+	if (err) {
+		const char *errstr = getxinfo_strerr(err);
+
+		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
+			node ? node : "", service ? service : "", errstr);
+		exit(1);
+	}
+}
+
+static int sock_listen_mptcp(const char * const listenaddr,
+			     const char * const port)
+{
+	int sock = -1;
+	struct addrinfo hints = {
+		.ai_protocol = IPPROTO_TCP,
+		.ai_socktype = SOCK_STREAM,
+		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
+	};
+
+	hints.ai_family = pf;
+
+	struct addrinfo *a, *addr;
+	int one = 1;
+
+	xgetaddrinfo(listenaddr, port, &hints, &addr);
+	hints.ai_family = pf;
+
+	for (a = addr; a; a = a->ai_next) {
+		sock = socket(a->ai_family, a->ai_socktype, proto_rx);
+		if (sock < 0)
+			continue;
+
+		if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
+				     sizeof(one)))
+			perror("setsockopt");
+
+		if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
+			break; /* success */
+
+		perror("bind");
+		close(sock);
+		sock = -1;
+	}
+
+	freeaddrinfo(addr);
+
+	if (sock < 0)
+		xerror("could not create listen socket");
+
+	if (listen(sock, 20))
+		die_perror("listen");
+
+	return sock;
+}
+
+static int sock_connect_mptcp(const char * const remoteaddr,
+			      const char * const port, int proto)
+{
+	struct addrinfo hints = {
+		.ai_protocol = IPPROTO_TCP,
+		.ai_socktype = SOCK_STREAM,
+	};
+	struct addrinfo *a, *addr;
+	int sock = -1;
+
+	hints.ai_family = pf;
+
+	xgetaddrinfo(remoteaddr, port, &hints, &addr);
+	for (a = addr; a; a = a->ai_next) {
+		sock = socket(a->ai_family, a->ai_socktype, proto);
+		if (sock < 0)
+			continue;
+
+		if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+			break; /* success */
+
+		die_perror("connect");
+	}
+
+	if (sock < 0)
+		xerror("could not create connect socket");
+
+	freeaddrinfo(addr);
+	return sock;
+}
+
+static int protostr_to_num(const char *s)
+{
+	if (strcasecmp(s, "tcp") == 0)
+		return IPPROTO_TCP;
+	if (strcasecmp(s, "mptcp") == 0)
+		return IPPROTO_MPTCP;
+
+	die_usage(1);
+	return 0;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "h6t:r:")) != -1) {
+		switch (c) {
+		case 'h':
+			die_usage(0);
+			break;
+		case '6':
+			pf = AF_INET6;
+			break;
+		case 't':
+			proto_tx = protostr_to_num(optarg);
+			break;
+		case 'r':
+			proto_rx = protostr_to_num(optarg);
+			break;
+		default:
+			die_usage(1);
+			break;
+		}
+	}
+}
+
+/* wait up to timeout milliseconds */
+static void wait_for_ack(int fd, int timeout, size_t total)
+{
+	int i;
+
+	for (i = 0; i < timeout; i++) {
+		int nsd, ret, queued = -1;
+		struct timespec req;
+
+		ret = ioctl(fd, TIOCOUTQ, &queued);
+		if (ret < 0)
+			die_perror("TIOCOUTQ");
+
+		ret = ioctl(fd, SIOCOUTQNSD, &nsd);
+		if (ret < 0)
+			die_perror("SIOCOUTQNSD");
+
+		if ((size_t)queued > total)
+			xerror("TIOCOUTQ %u, but only %zu expected\n", queued, total);
+		assert(nsd <= queued);
+
+		if (queued == 0)
+			return;
+
+		/* wait for peer to ack rx of all data */
+		req.tv_sec = 0;
+		req.tv_nsec = 1 * 1000 * 1000ul; /* 1ms */
+		nanosleep(&req, NULL);
+	}
+
+	xerror("still tx data queued after %u ms\n", timeout);
+}
+
+static void connect_one_server(int fd, int unixfd)
+{
+	size_t len, i, total, sent;
+	char buf[4096], buf2[4096];
+	ssize_t ret;
+
+	len = rand() % (sizeof(buf) - 1);
+
+	if (len < 128)
+		len = 128;
+
+	for (i = 0; i < len ; i++) {
+		buf[i] = rand() % 26;
+		buf[i] += 'A';
+	}
+
+	buf[i] = '\n';
+
+	/* un-block server */
+	ret = read(unixfd, buf2, 4);
+	assert(ret == 4);
+
+	assert(strncmp(buf2, "xmit", 4) == 0);
+
+	ret = write(unixfd, &len, sizeof(len));
+	assert(ret == (ssize_t)sizeof(len));
+
+	ret = write(fd, buf, len);
+	if (ret < 0)
+		die_perror("write");
+
+	if (ret != (ssize_t)len)
+		xerror("short write");
+
+	ret = read(unixfd, buf2, 4);
+	assert(strncmp(buf2, "huge", 4) == 0);
+
+	total = rand() % (16 * 1024 * 1024);
+	total += (1 * 1024 * 1024);
+	sent = total;
+
+	ret = write(unixfd, &total, sizeof(total));
+	assert(ret == (ssize_t)sizeof(total));
+
+	wait_for_ack(fd, 5000, len);
+
+	while (total > 0) {
+		if (total > sizeof(buf))
+			len = sizeof(buf);
+		else
+			len = total;
+
+		ret = write(fd, buf, len);
+		if (ret < 0)
+			die_perror("write");
+		total -= ret;
+
+		/* we don't have to care about buf content, only
+		 * number of total bytes sent
+		 */
+	}
+
+	ret = read(unixfd, buf2, 4);
+	assert(ret == 4);
+	assert(strncmp(buf2, "shut", 4) == 0);
+
+	wait_for_ack(fd, 5000, sent);
+
+	ret = write(fd, buf, 1);
+	assert(ret == 1);
+	close(fd);
+	ret = write(unixfd, "closed", 6);
+	assert(ret == 6);
+
+	close(unixfd);
+}
+
+static void get_tcp_inq(struct msghdr *msgh, unsigned int *inqv)
+{
+	struct cmsghdr *cmsg;
+
+	for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+		if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) {
+			memcpy(inqv, CMSG_DATA(cmsg), sizeof(*inqv));
+			return;
+		}
+	}
+
+	xerror("could not find TCP_CM_INQ cmsg type");
+}
+
+static void process_one_client(int fd, int unixfd)
+{
+	unsigned int tcp_inq;
+	size_t expect_len;
+	char msg_buf[4096];
+	char buf[4096];
+	char tmp[16];
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = 1,
+	};
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = msg_buf,
+		.msg_controllen = sizeof(msg_buf),
+	};
+	ssize_t ret, tot;
+
+	ret = write(unixfd, "xmit", 4);
+	assert(ret == 4);
+
+	ret = read(unixfd, &expect_len, sizeof(expect_len));
+	assert(ret == (ssize_t)sizeof(expect_len));
+
+	if (expect_len > sizeof(buf))
+		xerror("expect len %zu exceeds buffer size", expect_len);
+
+	for (;;) {
+		struct timespec req;
+		unsigned int queued;
+
+		ret = ioctl(fd, FIONREAD, &queued);
+		if (ret < 0)
+			die_perror("FIONREAD");
+		if (queued > expect_len)
+			xerror("FIONREAD returned %u, but only %zu expected\n",
+			       queued, expect_len);
+		if (queued == expect_len)
+			break;
+
+		req.tv_sec = 0;
+		req.tv_nsec = 1000 * 1000ul;
+		nanosleep(&req, NULL);
+	}
+
+	/* read one byte, expect cmsg to return expected - 1 */
+	ret = recvmsg(fd, &msg, 0);
+	if (ret < 0)
+		die_perror("recvmsg");
+
+	if (msg.msg_controllen == 0)
+		xerror("msg_controllen is 0");
+
+	get_tcp_inq(&msg, &tcp_inq);
+
+	assert((size_t)tcp_inq == (expect_len - 1));
+
+	iov.iov_len = sizeof(buf);
+	ret = recvmsg(fd, &msg, 0);
+	if (ret < 0)
+		die_perror("recvmsg");
+
+	/* should have gotten exact remainder of all pending data */
+	assert(ret == (ssize_t)tcp_inq);
+
+	/* should be 0, all drained */
+	get_tcp_inq(&msg, &tcp_inq);
+	assert(tcp_inq == 0);
+
+	/* request a large swath of data. */
+	ret = write(unixfd, "huge", 4);
+	assert(ret == 4);
+
+	ret = read(unixfd, &expect_len, sizeof(expect_len));
+	assert(ret == (ssize_t)sizeof(expect_len));
+
+	/* peer should send us a few mb of data */
+	if (expect_len <= sizeof(buf))
+		xerror("expect len %zu too small\n", expect_len);
+
+	tot = 0;
+	do {
+		iov.iov_len = sizeof(buf);
+		ret = recvmsg(fd, &msg, 0);
+		if (ret < 0)
+			die_perror("recvmsg");
+
+		tot += ret;
+
+		get_tcp_inq(&msg, &tcp_inq);
+
+		if (tcp_inq > expect_len - tot)
+			xerror("inq %d, remaining %d total_len %d\n",
+			       tcp_inq, expect_len - tot, (int)expect_len);
+
+		assert(tcp_inq <= expect_len - tot);
+	} while ((size_t)tot < expect_len);
+
+	ret = write(unixfd, "shut", 4);
+	assert(ret == 4);
+
+	/* wait for hangup. Should have received one more byte of data. */
+	ret = read(unixfd, tmp, sizeof(tmp));
+	assert(ret == 6);
+	assert(strncmp(tmp, "closed", 6) == 0);
+
+	sleep(1);
+
+	iov.iov_len = 1;
+	ret = recvmsg(fd, &msg, 0);
+	if (ret < 0)
+		die_perror("recvmsg");
+	assert(ret == 1);
+
+	get_tcp_inq(&msg, &tcp_inq);
+
+	/* tcp_inq should be 1 due to received fin. */
+	assert(tcp_inq == 1);
+
+	iov.iov_len = 1;
+	ret = recvmsg(fd, &msg, 0);
+	if (ret < 0)
+		die_perror("recvmsg");
+
+	/* expect EOF */
+	assert(ret == 0);
+	get_tcp_inq(&msg, &tcp_inq);
+	assert(tcp_inq == 1);
+
+	close(fd);
+}
+
+static int xaccept(int s)
+{
+	int fd = accept(s, NULL, 0);
+
+	if (fd < 0)
+		die_perror("accept");
+
+	return fd;
+}
+
+static int server(int unixfd)
+{
+	int fd = -1, r, on = 1;
+
+	switch (pf) {
+	case AF_INET:
+		fd = sock_listen_mptcp("127.0.0.1", "15432");
+		break;
+	case AF_INET6:
+		fd = sock_listen_mptcp("::1", "15432");
+		break;
+	default:
+		xerror("Unknown pf %d\n", pf);
+		break;
+	}
+
+	r = write(unixfd, "conn", 4);
+	assert(r == 4);
+
+	alarm(15);
+	r = xaccept(fd);
+
+	if (-1 == setsockopt(r, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)))
+		die_perror("setsockopt");
+
+	process_one_client(r, unixfd);
+
+	return 0;
+}
+
+static int client(int unixfd)
+{
+	int fd = -1;
+
+	alarm(15);
+
+	switch (pf) {
+	case AF_INET:
+		fd = sock_connect_mptcp("127.0.0.1", "15432", proto_tx);
+		break;
+	case AF_INET6:
+		fd = sock_connect_mptcp("::1", "15432", proto_tx);
+		break;
+	default:
+		xerror("Unknown pf %d\n", pf);
+	}
+
+	connect_one_server(fd, unixfd);
+
+	return 0;
+}
+
+static void init_rng(void)
+{
+	unsigned int foo;
+
+	if (getrandom(&foo, sizeof(foo), 0) == -1) {
+		perror("getrandom");
+		exit(1);
+	}
+
+	srand(foo);
+}
+
+static pid_t xfork(void)
+{
+	pid_t p = fork();
+
+	if (p < 0)
+		die_perror("fork");
+	else if (p == 0)
+		init_rng();
+
+	return p;
+}
+
+static int rcheck(int wstatus, const char *what)
+{
+	if (WIFEXITED(wstatus)) {
+		if (WEXITSTATUS(wstatus) == 0)
+			return 0;
+		fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus));
+		return WEXITSTATUS(wstatus);
+	} else if (WIFSIGNALED(wstatus)) {
+		xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus));
+	} else if (WIFSTOPPED(wstatus)) {
+		xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus));
+	}
+
+	return 111;
+}
+
+int main(int argc, char *argv[])
+{
+	int e1, e2, wstatus;
+	pid_t s, c, ret;
+	int unixfds[2];
+
+	parse_opts(argc, argv);
+
+	e1 = socketpair(AF_UNIX, SOCK_DGRAM, 0, unixfds);
+	if (e1 < 0)
+		die_perror("pipe");
+
+	s = xfork();
+	if (s == 0)
+		return server(unixfds[1]);
+
+	close(unixfds[1]);
+
+	/* wait until server bound a socket */
+	e1 = read(unixfds[0], &e1, 4);
+	assert(e1 == 4);
+
+	c = xfork();
+	if (c == 0)
+		return client(unixfds[0]);
+
+	close(unixfds[0]);
+
+	ret = waitpid(s, &wstatus, 0);
+	if (ret == -1)
+		die_perror("waitpid");
+	e1 = rcheck(wstatus, "server");
+	ret = waitpid(c, &wstatus, 0);
+	if (ret == -1)
+		die_perror("waitpid");
+	e2 = rcheck(wstatus, "client");
+
+	return e1 ? e1 : e2;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 08f53d86dedc..5e9211e89825 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1,42 +1,111 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+# ShellCheck incorrectly believes that most of the code here is unreachable
+# because it's invoked by variable name, see how the "tests" array is used
+#shellcheck disable=SC2317
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
 ret=0
 sin=""
+sinfail=""
 sout=""
 cin=""
+cinfail=""
+cinsent=""
+tmpfile=""
 cout=""
-ksft_skip=4
-timeout=30
-mptcp_connect=""
-capture=0
-
-TEST_COUNT=0
-
-init()
+err=""
+capout=""
+ns1=""
+ns2=""
+iptables="iptables"
+ip6tables="ip6tables"
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+capture=false
+checksum=false
+ip_mptcp=0
+check_invert=0
+validate_checksum=false
+init=0
+evts_ns1=""
+evts_ns2=""
+evts_ns1_pid=0
+evts_ns2_pid=0
+last_test_failed=0
+last_test_skipped=0
+last_test_ignored=1
+
+declare -A all_tests
+declare -a only_tests_ids
+declare -a only_tests_names
+declare -A failed_tests
+MPTCP_LIB_TEST_FORMAT="%03u %s\n"
+TEST_NAME=""
+nr_blank=6
+
+# These var are used only in some tests, make sure they are not already set
+unset FAILING_LINKS
+unset test_linkfail
+unset addr_nr_ns1
+unset addr_nr_ns2
+unset cestab_ns1
+unset cestab_ns2
+unset sflags
+unset fastclose
+unset fullmesh
+unset speed
+
+# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
+#				  (ip6 && (ip6[74] & 0xf0) == 0x30)'"
+CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
+			       48 0 0 0,
+			       84 0 0 240,
+			       21 0 3 64,
+			       48 0 0 54,
+			       84 0 0 240,
+			       21 6 7 48,
+			       48 0 0 0,
+			       84 0 0 240,
+			       21 0 4 96,
+			       48 0 0 74,
+			       84 0 0 240,
+			       21 0 1 48,
+			       6 0 0 65535,
+			       6 0 0 0"
+
+init_partial()
 {
 	capout=$(mktemp)
 
-	rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-
-	ns1="ns1-$rndh"
-	ns2="ns2-$rndh"
+	mptcp_lib_ns_init ns1 ns2
 
-	for netns in "$ns1" "$ns2";do
-		ip netns add $netns || exit $ksft_skip
-		ip -net $netns link set lo up
-		ip netns exec $netns sysctl -q net.mptcp.enabled=1
-		ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
-		ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+	local netns
+	for netns in "$ns1" "$ns2"; do
+		ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true
+		if $checksum; then
+			ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
+		fi
 	done
 
-	#  ns1              ns2
+	check_invert=0
+	validate_checksum=$checksum
+
+	#  ns1         ns2
 	# ns1eth1    ns2eth1
 	# ns1eth2    ns2eth2
 	# ns1eth3    ns2eth3
 	# ns1eth4    ns2eth4
 
-	for i in `seq 1 4`; do
+	local i
+	for i in $(seq 1 4); do
 		ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
 		ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
 		ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
@@ -48,6 +117,16 @@ init()
 
 		# let $ns2 reach any $ns1 address from any interface
 		ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+		ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i
+	done
+}
+
+init_shapers()
+{
+	local i
+	for i in $(seq 1 4); do
+		tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+		tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
 	done
 }
 
@@ -55,172 +134,986 @@ cleanup_partial()
 {
 	rm -f "$capout"
 
-	for netns in "$ns1" "$ns2"; do
-		ip netns del $netns
-	done
+	mptcp_lib_ns_exit "${ns1}" "${ns2}"
+}
+
+init() {
+	init=1
+
+	mptcp_lib_check_mptcp
+	mptcp_lib_check_kallsyms
+	mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}"
+
+	sin=$(mktemp)
+	sout=$(mktemp)
+	cin=$(mktemp)
+	cinsent=$(mktemp)
+	cout=$(mktemp)
+	err=$(mktemp)
+	evts_ns1=$(mktemp)
+	evts_ns2=$(mktemp)
+
+	trap cleanup EXIT
+
+	make_file "$cin" "client" 1 >/dev/null
+	make_file "$sin" "server" 1 >/dev/null
 }
 
 cleanup()
 {
-	rm -f "$cin" "$cout"
-	rm -f "$sin" "$sout"
+	rm -f "$cin" "$cout" "$sinfail"
+	rm -f "$sin" "$sout" "$cinsent" "$cinfail"
+	rm -f "$tmpfile"
+	rm -rf $evts_ns1 $evts_ns2
+	rm -f "$err"
 	cleanup_partial
 }
 
+print_check()
+{
+	printf "%-${nr_blank}s%-36s" " " "${*}"
+}
+
+print_info()
+{
+	# It can be empty, no need to print anything then
+	[ -z "${1}" ] && return
+
+	mptcp_lib_print_info "      Info: ${*}"
+}
+
+print_ok()
+{
+	mptcp_lib_pr_ok "${@}"
+}
+
+print_fail()
+{
+	mptcp_lib_pr_fail "${@}"
+}
+
+print_skip()
+{
+	mptcp_lib_pr_skip "${@}"
+}
+
+# [ $1: fail msg ]
+mark_as_skipped()
+{
+	local msg="${1:-"Feature not supported"}"
+
+	mptcp_lib_fail_if_expected_feature "${msg}"
+
+	print_check "${msg}"
+	print_skip
+
+	last_test_skipped=1
+}
+
+# $@: condition
+continue_if()
+{
+	if ! "${@}"; then
+		mark_as_skipped
+		return 1
+	fi
+}
+
+skip_test()
+{
+	if [ "${#only_tests_ids[@]}" -eq 0 ] && [ "${#only_tests_names[@]}" -eq 0 ]; then
+		return 1
+	fi
+
+	local i
+	for i in "${only_tests_ids[@]}"; do
+		if [ "$((MPTCP_LIB_TEST_COUNTER+1))" -eq "${i}" ]; then
+			return 1
+		fi
+	done
+	for i in "${only_tests_names[@]}"; do
+		if [ "${TEST_NAME}" = "${i}" ]; then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+append_prev_results()
+{
+	if [ ${last_test_failed} -eq 1 ]; then
+		mptcp_lib_result_fail "${TEST_NAME}"
+	elif [ ${last_test_skipped} -eq 1 ]; then
+		mptcp_lib_result_skip "${TEST_NAME}"
+	elif [ ${last_test_ignored} -ne 1 ]; then
+		mptcp_lib_result_pass "${TEST_NAME}"
+	fi
+
+	last_test_failed=0
+	last_test_skipped=0
+	last_test_ignored=0
+}
+
+# $1: test name
 reset()
 {
-	cleanup_partial
-	init
+	append_prev_results
+
+	TEST_NAME="${1}"
+
+	if skip_test; then
+		MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
+		last_test_ignored=1
+		return 1
+	fi
+
+	mptcp_lib_print_title "${TEST_NAME}"
+
+	if [ "${init}" != "1" ]; then
+		init
+	else
+		cleanup_partial
+	fi
+
+	init_partial
+
+	return 0
 }
 
+# $1: test name ; $2: counter to check
+reset_check_counter()
+{
+	reset "${1}" || return 1
+
+	local counter="${2}"
+
+	if ! nstat -asz "${counter}" | grep -wq "${counter}"; then
+		mark_as_skipped "counter '${counter}' is not available"
+		return 1
+	fi
+}
+
+# $1: test name
 reset_with_cookies()
 {
-	reset
+	reset "${1}" || return 1
 
-	for netns in "$ns1" "$ns2";do
+	local netns
+	for netns in "$ns1" "$ns2"; do
 		ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2
 	done
 }
 
-for arg in "$@"; do
-	if [ "$arg" = "-c" ]; then
-		capture=1
+# $1: test name
+reset_with_add_addr_timeout()
+{
+	local ip="${2:-4}"
+	local tables
+
+	reset "${1}" || return 1
+
+	tables="${iptables}"
+	if [ $ip -eq 6 ]; then
+		tables="${ip6tables}"
 	fi
-done
 
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without ip tool"
-	exit $ksft_skip
-fi
+	ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
 
+	if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \
+			-m tcp --tcp-option 30 \
+			-m bpf --bytecode \
+			"$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \
+			-j DROP; then
+		mark_as_skipped "unable to set the 'add addr' rule"
+		return 1
+	fi
+}
 
-check_transfer()
+# $1: test name
+reset_with_checksum()
+{
+	local ns1_enable=$1
+	local ns2_enable=$2
+
+	reset "checksum test ${1} ${2}" || return 1
+
+	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
+	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
+
+	validate_checksum=true
+}
+
+reset_with_allow_join_id0()
+{
+	local ns1_enable=$2
+	local ns2_enable=$3
+
+	reset "${1}" || return 1
+
+	ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
+	ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
+}
+
+# Modify TCP payload without corrupting the TCP packet
+#
+# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets
+# carrying enough data.
+# Once it is done, the TCP Checksum field is updated so the packet is still
+# considered as valid at the TCP level.
+# Because the MPTCP checksum, covering the TCP options and data, has not been
+# updated, the modification will be detected and an MP_FAIL will be emitted:
+# what we want to validate here without corrupting "random" MPTCP options.
+#
+# To avoid having tc producing this pr_info() message for each TCP ACK packets
+# not carrying enough data:
+#
+#     tc action pedit offset 162 out of bounds
+#
+# Netfilter is used to mark packets with enough data.
+setup_fail_rules()
+{
+	check_invert=1
+	validate_checksum=true
+	local i="$1"
+	local ip="${2:-4}"
+	local tables
+
+	tables="${iptables}"
+	if [ $ip -eq 6 ]; then
+		tables="${ip6tables}"
+	fi
+
+	ip netns exec $ns2 $tables \
+		-t mangle \
+		-A OUTPUT \
+		-o ns2eth$i \
+		-p tcp \
+		-m length --length 150:9999 \
+		-m statistic --mode nth --packet 1 --every 99999 \
+		-j MARK --set-mark 42 || return ${KSFT_SKIP}
+
+	tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${KSFT_SKIP}
+	tc -n $ns2 filter add dev ns2eth$i egress \
+		protocol ip prio 1000 \
+		handle 42 fw \
+		action pedit munge offset 148 u8 invert \
+		pipe csum tcp \
+		index 100 || return ${KSFT_SKIP}
+}
+
+reset_with_fail()
+{
+	reset_check_counter "${1}" "MPTcpExtInfiniteMapTx" || return 1
+	shift
+
+	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
+	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
+
+	local rc=0
+	setup_fail_rules "${@}" || rc=$?
+
+	if [ ${rc} -eq ${KSFT_SKIP} ]; then
+		mark_as_skipped "unable to set the 'fail' rules"
+		return 1
+	fi
+}
+
+reset_with_events()
 {
-	in=$1
-	out=$2
-	what=$3
+	reset "${1}" || return 1
 
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
-		echo "[ FAIL ] $what does not match (in, out):"
-		print_file_err "$in"
-		print_file_err "$out"
+	mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
+	mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+}
 
+reset_with_tcp_filter()
+{
+	reset "${1}" || return 1
+	shift
+
+	local ns="${!1}"
+	local src="${2}"
+	local target="${3}"
+
+	if ! ip netns exec "${ns}" ${iptables} \
+			-A INPUT \
+			-s "${src}" \
+			-p tcp \
+			-j "${target}"; then
+		mark_as_skipped "unable to set the filter rules"
 		return 1
 	fi
+}
+
+# $1: err msg
+fail_test()
+{
+	ret=${KSFT_FAIL}
+
+	if [ ${#} -gt 0 ]; then
+		print_fail "${@}"
+	fi
+
+	# just in case a test is marked twice as failed
+	if [ ${last_test_failed} -eq 0 ]; then
+		failed_tests[${MPTCP_LIB_TEST_COUNTER}]="${TEST_NAME}"
+		dump_stats
+		last_test_failed=1
+	fi
+}
+
+get_failed_tests_ids()
+{
+	# sorted
+	local i
+	for i in "${!failed_tests[@]}"; do
+		echo "${i}"
+	done | sort -n
+}
+
+check_transfer()
+{
+	local in=$1
+	local out=$2
+	local what=$3
+	local bytes=$4
+	local i a b
+
+	local line
+	if [ -n "$bytes" ]; then
+		local out_size
+		# when truncating we must check the size explicitly
+		out_size=$(wc -c $out | awk '{print $1}')
+		if [ $out_size -ne $bytes ]; then
+			fail_test "$what output file has wrong size ($out_size, $bytes)"
+			return 1
+		fi
+
+		# note: BusyBox's "cmp" command doesn't support --bytes
+		tmpfile=$(mktemp)
+		head --bytes="$bytes" "$in" > "$tmpfile"
+		mv "$tmpfile" "$in"
+		head --bytes="$bytes" "$out" > "$tmpfile"
+		mv "$tmpfile" "$out"
+		tmpfile=""
+	fi
+	cmp -l "$in" "$out" | while read -r i a b; do
+		local sum=$((0${a} + 0${b}))
+		if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
+			fail_test "$what does not match (in, out):"
+			mptcp_lib_print_file_err "$in"
+			mptcp_lib_print_file_err "$out"
+
+			return 1
+		else
+			print_info "$what has inverted byte at ${i}"
+		fi
+	done
 
 	return 0
 }
 
 do_ping()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	connect_addr="$3"
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local connect_addr="$3"
 
-	ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
-	if [ $? -ne 0 ] ; then
-		echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
-		ret=1
+	if ! ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null; then
+		fail_test "$listener_ns -> $connect_addr connectivity"
 	fi
 }
 
-do_transfer()
+link_failure()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	cl_proto="$3"
-	srv_proto="$4"
-	connect_addr="$5"
-	rm_nr_ns1="$6"
-	rm_nr_ns2="$7"
+	local ns="$1"
 
-	port=$((10000+$TEST_COUNT))
-	TEST_COUNT=$((TEST_COUNT+1))
+	if [ -z "$FAILING_LINKS" ]; then
+		l=$((RANDOM%4))
+		FAILING_LINKS=$((l+1))
+	fi
+
+	local l
+	for l in $FAILING_LINKS; do
+		local veth="ns1eth$l"
+		ip -net "$ns" link set "$veth" down
+	done
+}
+
+rm_addr_count()
+{
+	mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr"
+}
+
+# $1: ns, $2: old rm_addr counter in $ns
+wait_rm_addr()
+{
+	local ns="${1}"
+	local old_cnt="${2}"
+	local cnt
+
+	local i
+	for i in $(seq 10); do
+		cnt=$(rm_addr_count ${ns})
+		[ "$cnt" = "${old_cnt}" ] || break
+		sleep 0.1
+	done
+}
+
+rm_sf_count()
+{
+	mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow"
+}
+
+# $1: ns, $2: old rm_sf counter in $ns
+wait_rm_sf()
+{
+	local ns="${1}"
+	local old_cnt="${2}"
+	local cnt
+
+	local i
+	for i in $(seq 10); do
+		cnt=$(rm_sf_count ${ns})
+		[ "$cnt" = "${old_cnt}" ] || break
+		sleep 0.1
+	done
+}
+
+wait_mpj()
+{
+	local ns="${1}"
+	local cnt old_cnt
+
+	old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
+
+	local i
+	for i in $(seq 10); do
+		cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
+		[ "$cnt" = "${old_cnt}" ] || break
+		sleep 0.1
+	done
+}
+
+kill_events_pids()
+{
+	mptcp_lib_kill_wait $evts_ns1_pid
+	evts_ns1_pid=0
+	mptcp_lib_kill_wait $evts_ns2_pid
+	evts_ns2_pid=0
+}
+
+pm_nl_set_limits()
+{
+	local ns=$1
+	local addrs=$2
+	local subflows=$3
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows
+	else
+		ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows
+	fi
+}
+
+pm_nl_add_endpoint()
+{
+	local ns=$1
+	local addr=$2
+	local flags _flags
+	local port _port
+	local dev _dev
+	local id _id
+	local nr=2
+
+	local p
+	for p in "${@}"
+	do
+		if [ $p = "flags" ]; then
+			eval _flags=\$"$nr"
+			[ -n "$_flags" ]; flags="flags $_flags"
+		fi
+		if [ $p = "dev" ]; then
+			eval _dev=\$"$nr"
+			[ -n "$_dev" ]; dev="dev $_dev"
+		fi
+		if [ $p = "id" ]; then
+			eval _id=\$"$nr"
+			[ -n "$_id" ]; id="id $_id"
+		fi
+		if [ $p = "port" ]; then
+			eval _port=\$"$nr"
+			[ -n "$_port" ]; port="port $_port"
+		fi
+
+		nr=$((nr + 1))
+	done
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port
+	else
+		ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port
+	fi
+}
+
+pm_nl_del_endpoint()
+{
+	local ns=$1
+	local id=$2
+	local addr=$3
+
+	if [ $ip_mptcp -eq 1 ]; then
+		[ $id -ne 0 ] && addr=''
+		ip -n $ns mptcp endpoint delete id $id $addr
+	else
+		ip netns exec $ns ./pm_nl_ctl del $id $addr
+	fi
+}
+
+pm_nl_flush_endpoint()
+{
+	local ns=$1
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint flush
+	else
+		ip netns exec $ns ./pm_nl_ctl flush
+	fi
+}
+
+pm_nl_show_endpoints()
+{
+	local ns=$1
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint show
+	else
+		ip netns exec $ns ./pm_nl_ctl dump
+	fi
+}
+
+pm_nl_change_endpoint()
+{
+	local ns=$1
+	local id=$2
+	local flags=$3
+
+	if [ $ip_mptcp -eq 1 ]; then
+		ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
+	else
+		ip netns exec $ns ./pm_nl_ctl set id $id flags $flags
+	fi
+}
+
+pm_nl_check_endpoint()
+{
+	local line expected_line
+	local msg="$1"
+	local ns=$2
+	local addr=$3
+	local _flags=""
+	local flags
+	local _port
+	local port
+	local dev
+	local _id
+	local id
+
+	print_check "${msg}"
+
+	shift 3
+	while [ -n "$1" ]; do
+		if [ $1 = "flags" ]; then
+			_flags=$2
+			[ -n "$_flags" ]; flags="flags $_flags"
+			shift
+		elif [ $1 = "dev" ]; then
+			[ -n "$2" ]; dev="dev $1"
+			shift
+		elif [ $1 = "id" ]; then
+			_id=$2
+			[ -n "$_id" ]; id="id $_id"
+			shift
+		elif [ $1 = "port" ]; then
+			_port=$2
+			[ -n "$_port" ]; port=" port $_port"
+			shift
+		fi
+
+		shift
+	done
+
+	if [ -z "$id" ]; then
+		test_fail "bad test - missing endpoint id"
+		return
+	fi
+
+	if [ $ip_mptcp -eq 1 ]; then
+		# get line and trim trailing whitespace
+		line=$(ip -n $ns mptcp endpoint show $id)
+		line="${line% }"
+		# the dump order is: address id flags port dev
+		[ -n "$addr" ] && expected_line="$addr"
+		expected_line+=" $id"
+		[ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}"
+		[ -n "$dev" ] && expected_line+=" $dev"
+		[ -n "$port" ] && expected_line+=" $port"
+	else
+		line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
+		# the dump order is: id flags dev address port
+		expected_line="$id"
+		[ -n "$flags" ] && expected_line+=" $flags"
+		[ -n "$dev" ] && expected_line+=" $dev"
+		[ -n "$addr" ] && expected_line+=" $addr"
+		[ -n "$_port" ] && expected_line+=" $_port"
+	fi
+	if [ "$line" = "$expected_line" ]; then
+		print_ok
+	else
+		fail_test "expected '$expected_line' found '$line'"
+	fi
+}
+
+pm_nl_set_endpoint()
+{
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local connect_addr="$3"
+
+	local addr_nr_ns1=${addr_nr_ns1:-0}
+	local addr_nr_ns2=${addr_nr_ns2:-0}
+	local sflags=${sflags:-""}
+	local fullmesh=${fullmesh:-""}
+
+	local flags="subflow"
+	if [ -n "${fullmesh}" ]; then
+		flags="${flags},fullmesh"
+		addr_nr_ns2=${fullmesh}
+	fi
+
+	# let the mptcp subflow be established in background before
+	# do endpoint manipulation
+	if [ $addr_nr_ns1 != "0" ] || [ $addr_nr_ns2 != "0" ]; then
+		sleep 1
+	fi
+
+	if [ $addr_nr_ns1 -gt 0 ]; then
+		local counter=2
+		local add_nr_ns1=${addr_nr_ns1}
+		local id=10
+		while [ $add_nr_ns1 -gt 0 ]; do
+			local addr
+			if mptcp_lib_is_v6 "${connect_addr}"; then
+				addr="dead:beef:$counter::1"
+			else
+				addr="10.0.$counter.1"
+			fi
+			pm_nl_add_endpoint $ns1 $addr flags signal
+			counter=$((counter + 1))
+			add_nr_ns1=$((add_nr_ns1 - 1))
+			id=$((id + 1))
+		done
+	elif [ $addr_nr_ns1 -lt 0 ]; then
+		local rm_nr_ns1=$((-addr_nr_ns1))
+		if [ $rm_nr_ns1 -lt 8 ]; then
+			local counter=0
+			local line
+			pm_nl_show_endpoints ${listener_ns} | while read -r line; do
+				# shellcheck disable=SC2206 # we do want to split per word
+				local arr=($line)
+				local nr=0
+
+				local i
+				for i in "${arr[@]}"; do
+					if [ $i = "id" ]; then
+						if [ $counter -eq $rm_nr_ns1 ]; then
+							break
+						fi
+						id=${arr[$nr+1]}
+						rm_addr=$(rm_addr_count ${connector_ns})
+						pm_nl_del_endpoint ${listener_ns} $id
+						wait_rm_addr ${connector_ns} ${rm_addr}
+						counter=$((counter + 1))
+					fi
+					nr=$((nr + 1))
+				done
+			done
+		elif [ $rm_nr_ns1 -eq 8 ]; then
+			pm_nl_flush_endpoint ${listener_ns}
+		elif [ $rm_nr_ns1 -eq 9 ]; then
+			pm_nl_del_endpoint ${listener_ns} 0 ${connect_addr}
+		fi
+	fi
+
+	# if newly added endpoints must be deleted, give the background msk
+	# some time to created them
+	[ $addr_nr_ns1 -gt 0 ] && [ $addr_nr_ns2 -lt 0 ] && sleep 1
+
+	if [ $addr_nr_ns2 -gt 0 ]; then
+		local add_nr_ns2=${addr_nr_ns2}
+		local counter=3
+		local id=20
+		while [ $add_nr_ns2 -gt 0 ]; do
+			local addr
+			if mptcp_lib_is_v6 "${connect_addr}"; then
+				addr="dead:beef:$counter::2"
+			else
+				addr="10.0.$counter.2"
+			fi
+			pm_nl_add_endpoint $ns2 $addr flags $flags
+			counter=$((counter + 1))
+			add_nr_ns2=$((add_nr_ns2 - 1))
+			id=$((id + 1))
+		done
+	elif [ $addr_nr_ns2 -lt 0 ]; then
+		local rm_nr_ns2=$((-addr_nr_ns2))
+		if [ $rm_nr_ns2 -lt 8 ]; then
+			local counter=0
+			local line
+			pm_nl_show_endpoints ${connector_ns} | while read -r line; do
+				# shellcheck disable=SC2206 # we do want to split per word
+				local arr=($line)
+				local nr=0
+
+				local i
+				for i in "${arr[@]}"; do
+					if [ $i = "id" ]; then
+						if [ $counter -eq $rm_nr_ns2 ]; then
+							break
+						fi
+						local id rm_addr
+						# rm_addr are serialized, allow the previous one to
+						# complete
+						id=${arr[$nr+1]}
+						rm_addr=$(rm_addr_count ${listener_ns})
+						pm_nl_del_endpoint ${connector_ns} $id
+						wait_rm_addr ${listener_ns} ${rm_addr}
+						counter=$((counter + 1))
+					fi
+					nr=$((nr + 1))
+				done
+			done
+		elif [ $rm_nr_ns2 -eq 8 ]; then
+			pm_nl_flush_endpoint ${connector_ns}
+		elif [ $rm_nr_ns2 -eq 9 ]; then
+			local addr
+			if mptcp_lib_is_v6 "${connect_addr}"; then
+				addr="dead:beef:1::2"
+			else
+				addr="10.0.1.2"
+			fi
+			pm_nl_del_endpoint ${connector_ns} 0 $addr
+		fi
+	fi
+
+	if [ -n "${sflags}" ]; then
+		sleep 1
+
+		local netns
+		for netns in "$ns1" "$ns2"; do
+			local line
+			pm_nl_show_endpoints $netns | while read -r line; do
+				# shellcheck disable=SC2206 # we do want to split per word
+				local arr=($line)
+				local nr=0
+				local id
+
+				local i
+				for i in "${arr[@]}"; do
+					if [ $i = "id" ]; then
+						id=${arr[$nr+1]}
+					fi
+					nr=$((nr + 1))
+				done
+				pm_nl_change_endpoint $netns $id $sflags
+			done
+		done
+	fi
+}
+
+chk_cestab_nr()
+{
+	local ns=$1
+	local cestab=$2
+	local count
+
+	print_check "cestab $cestab"
+	count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$cestab" ]; then
+		fail_test "got $count current establish[s] expected $cestab"
+	else
+		print_ok
+	fi
+}
+
+# $1 namespace 1, $2 namespace 2
+check_cestab()
+{
+	if [ -n "${cestab_ns1}" ]; then
+		chk_cestab_nr ${1} ${cestab_ns1}
+	fi
+	if [ -n "${cestab_ns2}" ]; then
+		chk_cestab_nr ${2} ${cestab_ns2}
+	fi
+}
+
+do_transfer()
+{
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local cl_proto="$3"
+	local srv_proto="$4"
+	local connect_addr="$5"
+
+	local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1))
+	local cappid
+	local FAILING_LINKS=${FAILING_LINKS:-""}
+	local fastclose=${fastclose:-""}
+	local speed=${speed:-"fast"}
 
 	:> "$cout"
 	:> "$sout"
 	:> "$capout"
 
-	if [ $capture -eq 1 ]; then
+	if $capture; then
+		local capuser
 		if [ -z $SUDO_USER ] ; then
 			capuser=""
 		else
 			capuser="-Z $SUDO_USER"
 		fi
 
-		capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}")
+		capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}")
 
-		echo "Capturing traffic for test $TEST_COUNT into $capfile"
+		echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile"
 		ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
 		cappid=$!
 
 		sleep 1
 	fi
 
-	if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then
-		mptcp_connect="./mptcp_connect -j"
-	else
-		mptcp_connect="./mptcp_connect -r"
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat -n
+	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+		nstat -n
+
+	local extra_args
+	if [ $speed = "fast" ]; then
+		extra_args="-j"
+	elif [ $speed = "slow" ]; then
+		extra_args="-r 50"
+	elif [ $speed -gt 0 ]; then
+		extra_args="-r ${speed}"
 	fi
 
-	ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
-	spid=$!
-
-	sleep 1
-
-	ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
-	cpid=$!
+	local extra_cl_args=""
+	local extra_srv_args=""
+	local trunc_size=""
+	if [ -n "${fastclose}" ]; then
+		if [ ${test_linkfail} -le 1 ]; then
+			fail_test "fastclose tests need test_linkfail argument"
+			return 1
+		fi
 
-	if [ $rm_nr_ns1 -gt 0 ]; then
-		counter=1
-		sleep 1
+		# disconnect
+		trunc_size=${test_linkfail}
+		local side=${fastclose}
 
-		while [ $counter -le $rm_nr_ns1 ]
-		do
-			ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
-			sleep 1
-			let counter+=1
-		done
+		if [ ${side} = "client" ]; then
+			extra_cl_args="-f ${test_linkfail}"
+			extra_srv_args="-f -1"
+		elif [ ${side} = "server" ]; then
+			extra_srv_args="-f ${test_linkfail}"
+			extra_cl_args="-f -1"
+		else
+			fail_test "wrong/unknown fastclose spec ${side}"
+			return 1
+		fi
 	fi
 
-	if [ $rm_nr_ns2 -gt 0 ]; then
-		counter=1
-		sleep 1
-
-		while [ $counter -le $rm_nr_ns2 ]
-		do
-			ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
-			sleep 1
-			let counter+=1
-		done
+	extra_srv_args="$extra_args $extra_srv_args"
+	if [ "$test_linkfail" -gt 1 ];then
+		timeout ${timeout_test} \
+			ip netns exec ${listener_ns} \
+				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+					$extra_srv_args "::" < "$sinfail" > "$sout" &
+	else
+		timeout ${timeout_test} \
+			ip netns exec ${listener_ns} \
+				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+					$extra_srv_args "::" < "$sin" > "$sout" &
+	fi
+	local spid=$!
+
+	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
+
+	extra_cl_args="$extra_args $extra_cl_args"
+	if [ "$test_linkfail" -eq 0 ];then
+		timeout ${timeout_test} \
+			ip netns exec ${connector_ns} \
+				./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+					$extra_cl_args $connect_addr < "$cin" > "$cout" &
+	elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
+		( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
+			tee "$cinsent" | \
+			timeout ${timeout_test} \
+				ip netns exec ${connector_ns} \
+					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+						$extra_cl_args $connect_addr > "$cout" &
+	else
+		tee "$cinsent" < "$cinfail" | \
+			timeout ${timeout_test} \
+				ip netns exec ${connector_ns} \
+					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+						$extra_cl_args $connect_addr > "$cout" &
 	fi
+	local cpid=$!
+
+	pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
+	check_cestab $listener_ns $connector_ns
 
 	wait $cpid
-	retc=$?
+	local retc=$?
 	wait $spid
-	rets=$?
+	local rets=$?
 
-	if [ $capture -eq 1 ]; then
+	if $capture; then
 	    sleep 1
 	    kill $cappid
 	fi
 
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat | grep Tcp > /tmp/${listener_ns}.out
+	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+		nstat | grep Tcp > /tmp/${connector_ns}.out
+
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
-		echo " client exit code $retc, server $rets" 1>&2
+		fail_test "client exit code $retc, server $rets"
 		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+		cat /tmp/${listener_ns}.out
 		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+		cat /tmp/${connector_ns}.out
 
 		cat "$capout"
 		return 1
 	fi
 
-	check_transfer $sin $cout "file received by client"
+	if [ "$test_linkfail" -gt 1 ];then
+		check_transfer $sinfail $cout "file received by client" $trunc_size
+	else
+		check_transfer $sin $cout "file received by client" $trunc_size
+	fi
 	retc=$?
-	check_transfer $cin $sout "file received by server"
+	if [ "$test_linkfail" -eq 0 ];then
+		check_transfer $cin $sout "file received by server" $trunc_size
+	else
+		check_transfer $cinsent $sout "file received by server" $trunc_size
+	fi
 	rets=$?
 
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
@@ -234,95 +1127,391 @@ do_transfer()
 
 make_file()
 {
-	name=$1
-	who=$2
-
-	SIZE=1
+	local name=$1
+	local who=$2
+	local size=$3
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+	mptcp_lib_make_file $name 1024 $size
 
-	echo "Created $name (size $SIZE KB) containing data sent by $who"
+	print_info "Test file (size $size KB) for $who"
 }
 
 run_tests()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	connect_addr="$3"
-	lret=0
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local connect_addr="$3"
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0
-	lret=$?
-	if [ $lret -ne 0 ]; then
-		ret=$lret
-		return
+	local size
+	local test_linkfail=${test_linkfail:-0}
+
+	# The values above 2 are reused to make test files
+	# with the given sizes (KB)
+	if [ "$test_linkfail" -gt 2 ]; then
+		size=$test_linkfail
+
+		if [ -z "$cinfail" ]; then
+			cinfail=$(mktemp)
+		fi
+		make_file "$cinfail" "client" $size
+	# create the input file for the failure test when
+	# the first failure test run
+	elif [ "$test_linkfail" -ne 0 ] && [ -z "$cinfail" ]; then
+		# the client file must be considerably larger
+		# of the maximum expected cwin value, or the
+		# link utilization will be not predicable
+		size=$((RANDOM%2))
+		size=$((size+1))
+		size=$((size*8192))
+		size=$((size + ( RANDOM % 8192) ))
+
+		cinfail=$(mktemp)
+		make_file "$cinfail" "client" $size
 	fi
+
+	if [ "$test_linkfail" -gt 2 ]; then
+		size=$test_linkfail
+
+		if [ -z "$sinfail" ]; then
+			sinfail=$(mktemp)
+		fi
+		make_file "$sinfail" "server" $size
+	elif [ "$test_linkfail" -eq 2 ] && [ -z "$sinfail" ]; then
+		size=$((RANDOM%16))
+		size=$((size+1))
+		size=$((size*2048))
+
+		sinfail=$(mktemp)
+		make_file "$sinfail" "server" $size
+	fi
+
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
 }
 
-run_remove_tests()
+dump_stats()
 {
-	listener_ns="$1"
-	connector_ns="$2"
-	connect_addr="$3"
-	rm_nr_ns1="$4"
-	rm_nr_ns2="$5"
-	lret=0
+	echo Server ns stats
+	ip netns exec $ns1 nstat -as | grep Tcp
+	echo Client ns stats
+	ip netns exec $ns2 nstat -as | grep Tcp
+}
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2}
-	lret=$?
-	if [ $lret -ne 0 ]; then
-		ret=$lret
-		return
+chk_csum_nr()
+{
+	local csum_ns1=${1:-0}
+	local csum_ns2=${2:-0}
+	local count
+	local extra_msg=""
+	local allow_multi_errors_ns1=0
+	local allow_multi_errors_ns2=0
+
+	if [[ "${csum_ns1}" = "+"* ]]; then
+		allow_multi_errors_ns1=1
+		csum_ns1=${csum_ns1:1}
+	fi
+	if [[ "${csum_ns2}" = "+"* ]]; then
+		allow_multi_errors_ns2=1
+		csum_ns2=${csum_ns2:1}
+	fi
+
+	print_check "sum"
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
+	if [ "$count" != "$csum_ns1" ]; then
+		extra_msg+=" ns1=$count"
+	fi
+	if [ -z "$count" ]; then
+		print_skip
+	elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
+	   { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
+		fail_test "got $count data checksum error[s] expected $csum_ns1"
+	else
+		print_ok
+	fi
+	print_check "csum"
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
+	if [ "$count" != "$csum_ns2" ]; then
+		extra_msg+=" ns2=$count"
+	fi
+	if [ -z "$count" ]; then
+		print_skip
+	elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
+	   { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
+		fail_test "got $count data checksum error[s] expected $csum_ns2"
+	else
+		print_ok
+	fi
+
+	print_info "$extra_msg"
+}
+
+chk_fail_nr()
+{
+	local fail_tx=$1
+	local fail_rx=$2
+	local ns_invert=${3:-""}
+	local count
+	local ns_tx=$ns1
+	local ns_rx=$ns2
+	local extra_msg=""
+	local allow_tx_lost=0
+	local allow_rx_lost=0
+
+	if [[ $ns_invert = "invert" ]]; then
+		ns_tx=$ns2
+		ns_rx=$ns1
+		extra_msg="invert"
+	fi
+
+	if [[ "${fail_tx}" = "-"* ]]; then
+		allow_tx_lost=1
+		fail_tx=${fail_tx:1}
+	fi
+	if [[ "${fail_rx}" = "-"* ]]; then
+		allow_rx_lost=1
+		fail_rx=${fail_rx:1}
+	fi
+
+	print_check "ftx"
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
+	if [ "$count" != "$fail_tx" ]; then
+		extra_msg+=",tx=$count"
+	fi
+	if [ -z "$count" ]; then
+		print_skip
+	elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
+	   { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
+		fail_test "got $count MP_FAIL[s] TX expected $fail_tx"
+	else
+		print_ok
+	fi
+
+	print_check "failrx"
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
+	if [ "$count" != "$fail_rx" ]; then
+		extra_msg+=",rx=$count"
+	fi
+	if [ -z "$count" ]; then
+		print_skip
+	elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
+	   { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
+		fail_test "got $count MP_FAIL[s] RX expected $fail_rx"
+	else
+		print_ok
+	fi
+
+	print_info "$extra_msg"
+}
+
+chk_fclose_nr()
+{
+	local fclose_tx=$1
+	local fclose_rx=$2
+	local ns_invert=$3
+	local count
+	local ns_tx=$ns2
+	local ns_rx=$ns1
+	local extra_msg=""
+
+	if [[ $ns_invert = "invert" ]]; then
+		ns_tx=$ns1
+		ns_rx=$ns2
+		extra_msg="invert"
+	fi
+
+	print_check "ctx"
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$fclose_tx" ]; then
+		extra_msg+=",tx=$count"
+		fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
+	else
+		print_ok
+	fi
+
+	print_check "fclzrx"
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$fclose_rx" ]; then
+		extra_msg+=",rx=$count"
+		fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
+	else
+		print_ok
+	fi
+
+	print_info "$extra_msg"
+}
+
+chk_rst_nr()
+{
+	local rst_tx=$1
+	local rst_rx=$2
+	local ns_invert=${3:-""}
+	local count
+	local ns_tx=$ns1
+	local ns_rx=$ns2
+	local extra_msg=""
+
+	if [[ $ns_invert = "invert" ]]; then
+		ns_tx=$ns2
+		ns_rx=$ns1
+		extra_msg="invert"
+	fi
+
+	print_check "rtx"
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
+	if [ -z "$count" ]; then
+		print_skip
+	# accept more rst than expected except if we don't expect any
+	elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } ||
+	     { [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then
+		fail_test "got $count MP_RST[s] TX expected $rst_tx"
+	else
+		print_ok
+	fi
+
+	print_check "rstrx"
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
+	if [ -z "$count" ]; then
+		print_skip
+	# accept more rst than expected except if we don't expect any
+	elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } ||
+	     { [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then
+		fail_test "got $count MP_RST[s] RX expected $rst_rx"
+	else
+		print_ok
+	fi
+
+	print_info "$extra_msg"
+}
+
+chk_infi_nr()
+{
+	local infi_tx=$1
+	local infi_rx=$2
+	local count
+
+	print_check "itx"
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$infi_tx" ]; then
+		fail_test "got $count infinite map[s] TX expected $infi_tx"
+	else
+		print_ok
+	fi
+
+	print_check "infirx"
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$infi_rx" ]; then
+		fail_test "got $count infinite map[s] RX expected $infi_rx"
+	else
+		print_ok
 	fi
 }
 
 chk_join_nr()
 {
-	local msg="$1"
-	local syn_nr=$2
-	local syn_ack_nr=$3
-	local ack_nr=$4
+	local syn_nr=$1
+	local syn_ack_nr=$2
+	local ack_nr=$3
+	local csum_ns1=${4:-0}
+	local csum_ns2=${5:-0}
+	local fail_nr=${6:-0}
+	local rst_nr=${7:-0}
+	local infi_nr=${8:-0}
+	local corrupted_pkts=${9:-0}
 	local count
-	local dump_stats
+	local with_cookie
 
-	printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn"
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
-	[ -z "$count" ] && count=0
-	if [ "$count" != "$syn_nr" ]; then
-		echo "[fail] got $count JOIN[s] syn expected $syn_nr"
-		ret=1
-		dump_stats=1
+	if [ "${corrupted_pkts}" -gt 0 ]; then
+		print_info "${corrupted_pkts} corrupted pkts"
+	fi
+
+	print_check "syn"
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$syn_nr" ]; then
+		fail_test "got $count JOIN[s] syn expected $syn_nr"
 	else
-		echo -n "[ ok ]"
+		print_ok
 	fi
 
-	echo -n " - synack"
-	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
-	[ -z "$count" ] && count=0
-	if [ "$count" != "$syn_ack_nr" ]; then
-		echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
-		ret=1
-		dump_stats=1
+	print_check "synack"
+	with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$syn_ack_nr" ]; then
+		# simult connections exceeding the limit with cookie enabled could go up to
+		# synack validation as the conn limit can be enforced reliably only after
+		# the subflow creation
+		if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then
+			print_ok
+		else
+			fail_test "got $count JOIN[s] synack expected $syn_ack_nr"
+		fi
 	else
-		echo -n "[ ok ]"
+		print_ok
 	fi
 
-	echo -n " - ack"
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'`
-	[ -z "$count" ] && count=0
-	if [ "$count" != "$ack_nr" ]; then
-		echo "[fail] got $count JOIN[s] ack expected $ack_nr"
-		ret=1
+	print_check "ack"
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$ack_nr" ]; then
+		fail_test "got $count JOIN[s] ack expected $ack_nr"
+	else
+		print_ok
+	fi
+	if $validate_checksum; then
+		chk_csum_nr $csum_ns1 $csum_ns2
+		chk_fail_nr $fail_nr $fail_nr
+		chk_rst_nr $rst_nr $rst_nr
+		chk_infi_nr $infi_nr $infi_nr
+	fi
+}
+
+# a negative value for 'stale_max' means no upper bound:
+# for bidirectional transfer, if one peer sleep for a while
+# - as these tests do - we can have a quite high number of
+# stale/recover conversions, proportional to
+# sleep duration/ MPTCP-level RTX interval.
+chk_stale_nr()
+{
+	local ns=$1
+	local stale_min=$2
+	local stale_max=$3
+	local stale_delta=$4
+	local dump_stats
+	local stale_nr
+	local recover_nr
+
+	print_check "stale"
+
+	stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale")
+	recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover")
+	if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then
+		print_skip
+	elif [ $stale_nr -lt $stale_min ] ||
+	   { [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } ||
+	   [ $((stale_nr - recover_nr)) -ne $stale_delta ]; then
+		fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \
+		     " expected stale in range [$stale_min..$stale_max]," \
+		     " stale-recover delta $stale_delta"
 		dump_stats=1
 	else
-		echo "[ ok ]"
+		print_ok
 	fi
+
 	if [ "${dump_stats}" = 1 ]; then
-		echo Server ns stats
-		ip netns exec $ns1 nstat -as | grep MPTcp
-		echo Client ns stats
-		ip netns exec $ns2 nstat -as | grep MPTcp
+		echo $ns stats
+		ip netns exec $ns ip -s link show
+		ip netns exec $ns nstat -as | grep MPTcp
 	fi
 }
 
@@ -330,36 +1519,136 @@ chk_add_nr()
 {
 	local add_nr=$1
 	local echo_nr=$2
+	local port_nr=${3:-0}
+	local syn_nr=${4:-$port_nr}
+	local syn_ack_nr=${5:-$port_nr}
+	local ack_nr=${6:-$port_nr}
+	local mis_syn_nr=${7:-0}
+	local mis_ack_nr=${8:-0}
 	local count
-	local dump_stats
+	local timeout
+
+	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
+
+	print_check "add"
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr")
+	if [ -z "$count" ]; then
+		print_skip
+	# if the test configured a short timeout tolerate greater then expected
+	# add addrs options, due to retransmissions
+	elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
+		fail_test "got $count ADD_ADDR[s] expected $add_nr"
+	else
+		print_ok
+	fi
 
-	printf "%-39s %s" " " "add"
-	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'`
-	[ -z "$count" ] && count=0
-	if [ "$count" != "$add_nr" ]; then
-		echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
-		ret=1
-		dump_stats=1
+	print_check "echo"
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$echo_nr" ]; then
+		fail_test "got $count ADD_ADDR echo[s] expected $echo_nr"
 	else
-		echo -n "[ ok ]"
+		print_ok
 	fi
 
-	echo -n " - echo  "
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'`
-	[ -z "$count" ] && count=0
-	if [ "$count" != "$echo_nr" ]; then
-		echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
-		ret=1
-		dump_stats=1
+	if [ $port_nr -gt 0 ]; then
+		print_check "pt"
+		count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd")
+		if [ -z "$count" ]; then
+			print_skip
+		elif [ "$count" != "$port_nr" ]; then
+			fail_test "got $count ADD_ADDR[s] with a port-number expected $port_nr"
+		else
+			print_ok
+		fi
+
+		print_check "syn"
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
+		if [ -z "$count" ]; then
+			print_skip
+		elif [ "$count" != "$syn_nr" ]; then
+			fail_test "got $count JOIN[s] syn with a different \
+				   port-number expected $syn_nr"
+		else
+			print_ok
+		fi
+
+		print_check "synack"
+		count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
+		if [ -z "$count" ]; then
+			print_skip
+		elif [ "$count" != "$syn_ack_nr" ]; then
+			fail_test "got $count JOIN[s] synack with a different \
+				   port-number expected $syn_ack_nr"
+		else
+			print_ok
+		fi
+
+		print_check "ack"
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
+		if [ -z "$count" ]; then
+			print_skip
+		elif [ "$count" != "$ack_nr" ]; then
+			fail_test "got $count JOIN[s] ack with a different \
+				   port-number expected $ack_nr"
+		else
+			print_ok
+		fi
+
+		print_check "syn"
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
+		if [ -z "$count" ]; then
+			print_skip
+		elif [ "$count" != "$mis_syn_nr" ]; then
+			fail_test "got $count JOIN[s] syn with a mismatched \
+				   port-number expected $mis_syn_nr"
+		else
+			print_ok
+		fi
+
+		print_check "ack"
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
+		if [ -z "$count" ]; then
+			print_skip
+		elif [ "$count" != "$mis_ack_nr" ]; then
+			fail_test "got $count JOIN[s] ack with a mismatched \
+				   port-number expected $mis_ack_nr"
+		else
+			print_ok
+		fi
+	fi
+}
+
+chk_add_tx_nr()
+{
+	local add_tx_nr=$1
+	local echo_tx_nr=$2
+	local timeout
+	local count
+
+	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
+
+	print_check "add TX"
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
+	if [ -z "$count" ]; then
+		print_skip
+	# if the test configured a short timeout tolerate greater then expected
+	# add addrs options, due to retransmissions
+	elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
+		fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr"
 	else
-		echo "[ ok ]"
+		print_ok
 	fi
 
-	if [ "${dump_stats}" = 1 ]; then
-		echo Server ns stats
-		ip netns exec $ns1 nstat -as | grep MPTcp
-		echo Client ns stats
-		ip netns exec $ns2 nstat -as | grep MPTcp
+	print_check "echo TX"
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$echo_tx_nr" ]; then
+		fail_test "got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr"
+	else
+		print_ok
 	fi
 }
 
@@ -367,236 +1656,2089 @@ chk_rm_nr()
 {
 	local rm_addr_nr=$1
 	local rm_subflow_nr=$2
+	local invert
+	local simult
+	local count
+	local addr_ns=$ns1
+	local subflow_ns=$ns2
+	local extra_msg=""
+
+	shift 2
+	while [ -n "$1" ]; do
+		[ "$1" = "invert" ] && invert=true
+		[ "$1" = "simult" ] && simult=true
+		shift
+	done
+
+	if [ -z $invert ]; then
+		addr_ns=$ns1
+		subflow_ns=$ns2
+	elif [ $invert = "true" ]; then
+		addr_ns=$ns2
+		subflow_ns=$ns1
+		extra_msg="invert"
+	fi
+
+	print_check "rm"
+	count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$rm_addr_nr" ]; then
+		fail_test "got $count RM_ADDR[s] expected $rm_addr_nr"
+	else
+		print_ok
+	fi
+
+	print_check "rmsf"
+	count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ -n "$simult" ]; then
+		local cnt suffix
+
+		cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow")
+
+		# in case of simult flush, the subflow removal count on each side is
+		# unreliable
+		count=$((count + cnt))
+		if [ "$count" != "$rm_subflow_nr" ]; then
+			suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+			extra_msg+=" simult"
+		fi
+		if [ $count -ge "$rm_subflow_nr" ] && \
+		   [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
+			print_ok "$suffix"
+		else
+			fail_test "got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+		fi
+	elif [ "$count" != "$rm_subflow_nr" ]; then
+		fail_test "got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
+	else
+		print_ok
+	fi
+
+	print_info "$extra_msg"
+}
+
+chk_rm_tx_nr()
+{
+	local rm_addr_tx_nr=$1
+
+	print_check "rm TX"
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$rm_addr_tx_nr" ]; then
+		fail_test "got $count RM_ADDR[s] expected $rm_addr_tx_nr"
+	else
+		print_ok
+	fi
+}
+
+chk_prio_nr()
+{
+	local mp_prio_nr_tx=$1
+	local mp_prio_nr_rx=$2
 	local count
+
+	print_check "ptx"
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$mp_prio_nr_tx" ]; then
+		fail_test "got $count MP_PRIO[s] TX expected $mp_prio_nr_tx"
+	else
+		print_ok
+	fi
+
+	print_check "prx"
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$mp_prio_nr_rx" ]; then
+		fail_test "got $count MP_PRIO[s] RX expected $mp_prio_nr_rx"
+	else
+		print_ok
+	fi
+}
+
+chk_subflow_nr()
+{
+	local msg="$1"
+	local subflow_nr=$2
+	local cnt1
+	local cnt2
 	local dump_stats
 
-	printf "%-39s %s" " " "rm "
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
-	[ -z "$count" ] && count=0
-	if [ "$count" != "$rm_addr_nr" ]; then
-		echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
-		ret=1
+	print_check "${msg}"
+
+	cnt1=$(ss -N $ns1 -tOni | grep -c token)
+	cnt2=$(ss -N $ns2 -tOni | grep -c token)
+	if [ "$cnt1" != "$subflow_nr" ] || [ "$cnt2" != "$subflow_nr" ]; then
+		fail_test "got $cnt1:$cnt2 subflows expected $subflow_nr"
 		dump_stats=1
 	else
-		echo -n "[ ok ]"
+		print_ok
 	fi
 
-	echo -n " - sf    "
-	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
-	[ -z "$count" ] && count=0
-	if [ "$count" != "$rm_subflow_nr" ]; then
-		echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
-		ret=1
+	if [ "${dump_stats}" = 1 ]; then
+		ss -N $ns1 -tOni
+		ss -N $ns1 -tOni | grep token
+		ip -n $ns1 mptcp endpoint
+	fi
+}
+
+chk_mptcp_info()
+{
+	local info1=$1
+	local exp1=$2
+	local info2=$3
+	local exp2=$4
+	local cnt1
+	local cnt2
+	local dump_stats
+
+	print_check "mptcp_info ${info1:0:15}=$exp1:$exp2"
+
+	cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1")
+	cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2")
+	# 'ss' only display active connections and counters that are not 0.
+	[ -z "$cnt1" ] && cnt1=0
+	[ -z "$cnt2" ] && cnt2=0
+
+	if [ "$cnt1" != "$exp1" ] || [ "$cnt2" != "$exp2" ]; then
+		fail_test "got $cnt1:$cnt2 $info1:$info2 expected $exp1:$exp2"
 		dump_stats=1
 	else
-		echo "[ ok ]"
+		print_ok
 	fi
 
-	if [ "${dump_stats}" = 1 ]; then
-		echo Server ns stats
-		ip netns exec $ns1 nstat -as | grep MPTcp
-		echo Client ns stats
-		ip netns exec $ns2 nstat -as | grep MPTcp
-	fi
-}
-
-sin=$(mktemp)
-sout=$(mktemp)
-cin=$(mktemp)
-cout=$(mktemp)
-init
-make_file "$cin" "client"
-make_file "$sin" "server"
-trap cleanup EXIT
-
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "no JOIN" "0" "0" "0"
-
-# subflow limted by client
-reset
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "single subflow, limited by client" 0 0 0
-
-# subflow limted by server
-reset
-ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "single subflow, limited by server" 1 1 0
-
-# subflow
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "single subflow" 1 1 1
-
-# multiple subflows
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "multiple subflows" 2 2 2
-
-# multiple subflows limited by serverf
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "multiple subflows, limited by server" 2 2 1
-
-# add_address, unused
-reset
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "unused signal address" 0 0 0
-chk_add_nr 1 1
-
-# accept and use add_addr
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "signal address" 1 1 1
-chk_add_nr 1 1
-
-# accept and use add_addr with an additional subflow
-# note: signal address in server ns and local addresses in client ns must
-# belong to different subnets or one of the listed local address could be
-# used for 'add_addr' subflow
-reset
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "subflow and signal" 2 2 2
-chk_add_nr 1 1
-
-# accept and use add_addr with additional subflows
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "multiple subflows and signal" 3 3 3
-chk_add_nr 1 1
-
-# single subflow, remove
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 0 1
-chk_join_nr "remove single subflow" 1 1 1
-chk_rm_nr 1 1
-
-# multiple subflows, remove
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 0 2
-chk_join_nr "remove multiple subflows" 2 2 2
-chk_rm_nr 2 2
-
-# single address, remove
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-run_remove_tests $ns1 $ns2 10.0.1.1 1 0
-chk_join_nr "remove single address" 1 1 1
-chk_add_nr 1 1
-chk_rm_nr 0 0
-
-# subflow and signal, remove
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 1 1
-chk_join_nr "remove subflow and signal" 2 2 2
-chk_add_nr 1 1
-chk_rm_nr 1 1
-
-# subflows and signal, remove
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 1 2
-chk_join_nr "remove subflows and signal" 3 3 3
-chk_add_nr 1 1
-chk_rm_nr 2 2
-
-# single subflow, syncookies
-reset_with_cookies
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "single subflow with syn cookies" 1 1 1
-
-# multiple subflows with syn cookies
-reset_with_cookies
-ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "multiple subflows with syn cookies" 2 2 2
-
-# multiple subflows limited by server
-reset_with_cookies
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "subflows limited by server w cookies" 2 2 1
-
-# test signal address with cookies
-reset_with_cookies
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "signal address with syn cookies" 1 1 1
-chk_add_nr 1 1
-
-# test cookie with subflow and signal
-reset_with_cookies
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "subflow and signal w cookies" 2 2 2
-chk_add_nr 1 1
-
-# accept and use add_addr with additional subflows
-reset_with_cookies
-ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "subflows and signal w. cookies" 3 3 3
-chk_add_nr 1 1
+	if [ "$dump_stats" = 1 ]; then
+		ss -N $ns1 -inmHM
+		ss -N $ns2 -inmHM
+	fi
+}
+
+# $1: subflows in ns1 ; $2: subflows in ns2
+# number of all subflows, including the initial subflow.
+chk_subflows_total()
+{
+	local cnt1
+	local cnt2
+	local info="subflows_total"
+	local dump_stats
+
+	# if subflows_total counter is supported, use it:
+	if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then
+		chk_mptcp_info $info $1 $info $2
+		return
+	fi
+
+	print_check "$info $1:$2"
+
+	# if not, count the TCP connections that are in fact MPTCP subflows
+	cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv |
+	       grep -c tcp-ulp-mptcp)
+	cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv |
+	       grep -c tcp-ulp-mptcp)
+
+	if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then
+		fail_test "got subflows $cnt1:$cnt2 expected $1:$2"
+		dump_stats=1
+	else
+		print_ok
+	fi
+
+	if [ "$dump_stats" = 1 ]; then
+		ss -N $ns1 -ti
+		ss -N $ns2 -ti
+	fi
+}
+
+chk_link_usage()
+{
+	local ns=$1
+	local link=$2
+	local out=$3
+	local expected_rate=$4
+
+	local tx_link tx_total
+	tx_link=$(ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes)
+	tx_total=$(stat --format=%s $out)
+	local tx_rate=$((tx_link * 100 / tx_total))
+	local tolerance=5
+
+	print_check "link usage"
+	if [ $tx_rate -lt $((expected_rate - tolerance)) ] || \
+	   [ $tx_rate -gt $((expected_rate + tolerance)) ]; then
+		fail_test "got $tx_rate% usage, expected $expected_rate%"
+	else
+		print_ok
+	fi
+}
+
+wait_attempt_fail()
+{
+	local timeout_ms=$((timeout_poll * 1000))
+	local time=0
+	local ns=$1
+
+	while [ $time -lt $timeout_ms ]; do
+		local cnt
+
+		cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails")
+
+		[ "$cnt" = 1 ] && return 1
+		time=$((time + 100))
+		sleep 0.1
+	done
+	return 1
+}
+
+set_userspace_pm()
+{
+	local ns=$1
+
+	ip netns exec $ns sysctl -q net.mptcp.pm_type=1
+}
+
+subflows_tests()
+{
+	if reset "no JOIN"; then
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# subflow limited by client
+	if reset "single subflow, limited by client"; then
+		pm_nl_set_limits $ns1 0 0
+		pm_nl_set_limits $ns2 0 0
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# subflow limited by server
+	if reset "single subflow, limited by server"; then
+		pm_nl_set_limits $ns1 0 0
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 0
+	fi
+
+	# subflow
+	if reset "single subflow"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+	fi
+
+	# multiple subflows
+	if reset "multiple subflows"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+	fi
+
+	# multiple subflows limited by server
+	if reset "multiple subflows, limited by server"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 1
+	fi
+
+	# single subflow, dev
+	if reset "single subflow, dev"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+	fi
+}
+
+subflows_error_tests()
+{
+	# If a single subflow is configured, and matches the MPC src
+	# address, no additional subflow should be created
+	if reset "no MPC reuse with single endpoint"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# multiple subflows, with subflow creation error
+	if reset_with_tcp_filter "multi subflows, with failing subflow" ns1 10.0.3.2 REJECT &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+	fi
+
+	# multiple subflows, with subflow timeout on MPJ
+	if reset_with_tcp_filter "multi subflows, with subflow timeout" ns1 10.0.3.2 DROP &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+	fi
+
+	# multiple subflows, check that the endpoint corresponding to
+	# closed subflow (due to reset) is not reused if additional
+	# subflows are added later
+	if reset_with_tcp_filter "multi subflows, fair usage on close" ns1 10.0.3.2 REJECT &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1 &
+
+		# mpj subflow will be in TW after the reset
+		wait_attempt_fail $ns2
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		wait
+
+		# additional subflow could be created only if the PM select
+		# the later endpoint, skipping the already used one
+		chk_join_nr 1 1 1
+	fi
+}
+
+signal_address_tests()
+{
+	# add_address, unused
+	if reset "unused signal address"; then
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_add_tx_nr 1 1
+		chk_add_nr 1 1
+	fi
+
+	# accept and use add_addr
+	if reset "signal address"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# accept and use add_addr with an additional subflow
+	# note: signal address in server ns and local addresses in client ns must
+	# belong to different subnets or one of the listed local address could be
+	# used for 'add_addr' subflow
+	if reset "subflow and signal"; then
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1
+	fi
+
+	# accept and use add_addr with additional subflows
+	if reset "multiple subflows and signal"; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 1 1
+	fi
+
+	# signal addresses
+	if reset "signal addresses"; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 3 3
+	fi
+
+	# signal invalid addresses
+	if reset "signal invalid addresses"; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 3 3
+	fi
+
+	# signal addresses race test
+	if reset "signal addresses race test"; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_set_limits $ns2 4 4
+		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags signal
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags signal
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags signal
+
+		# the peer could possibly miss some addr notification, allow retransmission
+		ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+
+		# It is not directly linked to the commit introducing this
+		# symbol but for the parent one which is linked anyway.
+		if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+			chk_join_nr 3 3 2
+			chk_add_nr 4 4
+		else
+			chk_join_nr 3 3 3
+			# the server will not signal the address terminating
+			# the MPC subflow
+			chk_add_nr 3 3
+		fi
+	fi
+}
+
+link_failure_tests()
+{
+	# accept and use add_addr with additional subflows and link loss
+	if reset "multiple flows, signal, link failure"; then
+		# without any b/w limit each veth could spool the packets and get
+		# them acked at xmit time, so that the corresponding subflow will
+		# have almost always no outstanding pkts, the scheduler will pick
+		# always the first subflow and we will have hard time testing
+		# active backup and link switch-over.
+		# Let's set some arbitrary (low) virtual link limits.
+		init_shapers
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
+		test_linkfail=1 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 1 1
+		chk_stale_nr $ns2 1 5 1
+	fi
+
+	# accept and use add_addr with additional subflows and link loss
+	# for bidirectional transfer
+	if reset "multi flows, signal, bidi, link fail"; then
+		init_shapers
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
+		test_linkfail=2 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 1 1
+		chk_stale_nr $ns2 1 -1 1
+	fi
+
+	# 2 subflows plus 1 backup subflow with a lossy link, backup
+	# will never be used
+	if reset "backup subflow unused, link failure"; then
+		init_shapers
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+		FAILING_LINKS="1" test_linkfail=1 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1
+		chk_link_usage $ns2 ns2eth3 $cinsent 0
+	fi
+
+	# 2 lossy links after half transfer, backup will get half of
+	# the traffic
+	if reset "backup flow used, multi links fail"; then
+		init_shapers
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+		FAILING_LINKS="1 2" test_linkfail=1 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1
+		chk_stale_nr $ns2 2 4 2
+		chk_link_usage $ns2 ns2eth3 $cinsent 50
+	fi
+
+	# use a backup subflow with the first subflow on a lossy link
+	# for bidirectional transfer
+	if reset "backup flow used, bidi, link failure"; then
+		init_shapers
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+		FAILING_LINKS="1 2" test_linkfail=2 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1
+		chk_stale_nr $ns2 1 -1 2
+		chk_link_usage $ns2 ns2eth3 $cinsent 50
+	fi
+}
+
+add_addr_timeout_tests()
+{
+	# add_addr timeout
+	if reset_with_add_addr_timeout "signal address, ADD_ADDR timeout"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_tx_nr 4 4
+		chk_add_nr 4 0
+	fi
+
+	# add_addr timeout IPv6
+	if reset_with_add_addr_timeout "signal address, ADD_ADDR6 timeout" 6; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		speed=slow \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 1 1 1
+		chk_add_nr 4 0
+	fi
+
+	# signal addresses timeout
+	if reset_with_add_addr_timeout "signal addresses, ADD_ADDR timeout"; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_set_limits $ns2 2 2
+		speed=10 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 8 0
+	fi
+
+	# signal invalid addresses timeout
+	if reset_with_add_addr_timeout "invalid address, ADD_ADDR timeout"; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_set_limits $ns2 2 2
+		speed=10 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 8 0
+	fi
+}
+
+remove_tests()
+{
+	# single subflow, remove
+	if reset "remove single subflow"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		addr_nr_ns2=-1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_rm_tx_nr 1
+		chk_rm_nr 1 1
+		chk_rst_nr 0 0
+	fi
+
+	# multiple subflows, remove
+	if reset "remove multiple subflows"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		addr_nr_ns2=-2 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_rm_nr 2 2
+		chk_rst_nr 0 0
+	fi
+
+	# single address, remove
+	if reset "remove single address"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		addr_nr_ns1=-1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_rm_nr 1 1 invert
+		chk_rst_nr 0 0
+	fi
+
+	# subflow and signal, remove
+	if reset "remove subflow and signal"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1
+		chk_rm_nr 1 1
+		chk_rst_nr 0 0
+	fi
+
+	# subflows and signal, remove
+	if reset "remove subflows and signal"; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 1 1
+		chk_rm_nr 2 2
+		chk_rst_nr 0 0
+	fi
+
+	# addresses remove
+	if reset "remove addresses"; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		addr_nr_ns1=-3 speed=10 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 3 3
+		chk_rm_nr 3 3 invert
+		chk_rst_nr 0 0
+	fi
+
+	# invalid addresses remove
+	if reset "remove invalid addresses"; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		addr_nr_ns1=-3 speed=10 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 3 3
+		chk_rm_nr 3 1 invert
+		chk_rst_nr 0 0
+	fi
+
+	# subflows and signal, flush
+	if reset "flush subflows and signal"; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 1 1
+		chk_rm_nr 1 3 invert simult
+		chk_rst_nr 0 0
+	fi
+
+	# subflows flush
+	if reset "flush subflows"; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_set_limits $ns2 3 3
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+
+		if mptcp_lib_kversion_ge 5.18; then
+			chk_rm_tx_nr 0
+			chk_rm_nr 0 3 simult
+		else
+			chk_rm_nr 3 3
+		fi
+		chk_rst_nr 0 0
+	fi
+
+	# addresses flush
+	if reset "flush addresses"; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 3 3
+		chk_rm_nr 3 3 invert simult
+		chk_rst_nr 0 0
+	fi
+
+	# invalid addresses flush
+	if reset "flush invalid addresses"; then
+		pm_nl_set_limits $ns1 3 3
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+		pm_nl_set_limits $ns2 3 3
+		addr_nr_ns1=-8 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 3 3
+		chk_rm_nr 3 1 invert
+		chk_rst_nr 0 0
+	fi
+
+	# remove id 0 subflow
+	if reset "remove id 0 subflow"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		addr_nr_ns2=-9 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_rm_nr 1 1
+		chk_rst_nr 0 0
+	fi
+
+	# remove id 0 address
+	if reset "remove id 0 address"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		addr_nr_ns1=-9 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_rm_nr 1 1 invert
+		chk_rst_nr 0 0 invert
+	fi
+}
+
+add_tests()
+{
+	# add single subflow
+	if reset "add single subflow"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		addr_nr_ns2=1 speed=slow cestab_ns2=1 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_cestab_nr $ns2 0
+	fi
+
+	# add signal address
+	if reset "add signal address"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		addr_nr_ns1=1 speed=slow cestab_ns1=1 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_cestab_nr $ns1 0
+	fi
+
+	# add multiple subflows
+	if reset "add multiple subflows"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		addr_nr_ns2=2 speed=slow cestab_ns2=1 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_cestab_nr $ns2 0
+	fi
+
+	# add multiple subflows IPv6
+	if reset "add multiple subflows IPv6"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		addr_nr_ns2=2 speed=slow cestab_ns2=1 \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 2 2 2
+		chk_cestab_nr $ns2 0
+	fi
+
+	# add multiple addresses IPv6
+	if reset "add multiple addresses IPv6"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		addr_nr_ns1=2 speed=slow cestab_ns1=1 \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 2 2 2
+		chk_add_nr 2 2
+		chk_cestab_nr $ns1 0
+	fi
+}
+
+ipv6_tests()
+{
+	# subflow IPv6
+	if reset "single subflow IPv6"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
+		speed=slow \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 1 1 1
+	fi
+
+	# add_address, unused IPv6
+	if reset "unused signal address IPv6"; then
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		speed=slow \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 0 0 0
+		chk_add_nr 1 1
+	fi
+
+	# signal address IPv6
+	if reset "single address IPv6"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		speed=slow \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# single address IPv6, remove
+	if reset "remove single address IPv6"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		addr_nr_ns1=-1 speed=slow \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_rm_nr 1 1 invert
+	fi
+
+	# subflow and signal IPv6, remove
+	if reset "remove subflow and signal IPv6"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
+		addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1
+		chk_rm_nr 1 1
+	fi
+}
+
+v4mapped_tests()
+{
+	# subflow IPv4-mapped to IPv4-mapped
+	if reset "single subflow IPv4-mapped"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
+		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+		chk_join_nr 1 1 1
+	fi
+
+	# signal address IPv4-mapped with IPv4-mapped sk
+	if reset "signal address IPv4-mapped"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
+		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# subflow v4-map-v6
+	if reset "single subflow v4-map-v6"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+		chk_join_nr 1 1 1
+	fi
+
+	# signal address v4-map-v6
+	if reset "signal address v4-map-v6"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# subflow v6-map-v4
+	if reset "single subflow v6-map-v4"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+	fi
+
+	# signal address v6-map-v4
+	if reset "signal address v6-map-v4"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# no subflow IPv6 to v4 address
+	if reset "no JOIN with diff families v4-v6"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
+	if reset "no JOIN with diff families v4-v6-2"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# no subflow IPv4 to v6 address, no need to slow down too then
+	if reset "no JOIN with diff families v6-v4"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 0 0 0
+	fi
+}
+
+mixed_tests()
+{
+	if reset "IPv4 sockets do not use IPv6 addresses" &&
+	   continue_if mptcp_lib_kversion_ge 6.3; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# Need an IPv6 mptcp socket to allow subflows of both families
+	if reset "simult IPv4 and IPv6 subflows" &&
+	   continue_if mptcp_lib_kversion_ge 6.3; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+		speed=slow \
+			run_tests $ns1 $ns2 dead:beef:2::1
+		chk_join_nr 1 1 1
+	fi
+
+	# cross families subflows will not be created even in fullmesh mode
+	if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1" &&
+	   continue_if mptcp_lib_kversion_ge 6.3; then
+		pm_nl_set_limits $ns1 0 4
+		pm_nl_set_limits $ns2 1 4
+		pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh
+		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+		speed=slow \
+			run_tests $ns1 $ns2 dead:beef:2::1
+		chk_join_nr 1 1 1
+	fi
+
+	# fullmesh still tries to create all the possibly subflows with
+	# matching family
+	if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2" &&
+	   continue_if mptcp_lib_kversion_ge 6.3; then
+		pm_nl_set_limits $ns1 0 4
+		pm_nl_set_limits $ns2 2 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		fullmesh=1 speed=slow \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 4 4 4
+	fi
+}
+
+backup_tests()
+{
+	# single subflow, backup
+	if reset "single subflow, backup" &&
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+		sflags=nobackup speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_prio_nr 0 1
+	fi
+
+	# single address, backup
+	if reset "single address, backup" &&
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 1
+		sflags=backup speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_prio_nr 1 1
+	fi
+
+	# single address with port, backup
+	if reset "single address with port, backup" &&
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns2 1 1
+		sflags=backup speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_prio_nr 1 1
+	fi
+
+	if reset "mpc backup" &&
+	   continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_prio_nr 0 1
+	fi
+
+	if reset "mpc backup both sides" &&
+	   continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
+		pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_prio_nr 1 1
+	fi
+
+	if reset "mpc switch to backup" &&
+	   continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+		sflags=backup speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_prio_nr 0 1
+	fi
+
+	if reset "mpc switch to backup both sides" &&
+	   continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
+		pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+		sflags=backup speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_prio_nr 1 1
+	fi
+}
+
+verify_listener_events()
+{
+	local e_type=$2
+	local e_saddr=$4
+	local e_sport=$5
+	local name
+
+	if [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CREATED ]; then
+		name="LISTENER_CREATED"
+	elif [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CLOSED ]; then
+		name="LISTENER_CLOSED "
+	else
+		name="$e_type"
+	fi
+
+	print_check "$name $e_saddr:$e_sport"
+
+	if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+		print_skip "event not supported"
+		return
+	fi
+
+	if mptcp_lib_verify_listener_events "${@}"; then
+		print_ok
+		return 0
+	fi
+	fail_test
+}
+
+add_addr_ports_tests()
+{
+	# signal address with port
+	if reset "signal address with port"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1 1
+	fi
+
+	# subflow and signal with port
+	if reset "subflow and signal with port"; then
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1 1
+	fi
+
+	# single address with port, remove
+	# pm listener events
+	if reset_with_events "remove single address with port"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns2 1 1
+		addr_nr_ns1=-1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1 1
+		chk_rm_nr 1 1 invert
+
+		verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CREATED \
+				       $MPTCP_LIB_AF_INET 10.0.2.1 10100
+		verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CLOSED \
+				       $MPTCP_LIB_AF_INET 10.0.2.1 10100
+		kill_events_pids
+	fi
+
+	# subflow and signal with port, remove
+	if reset "remove subflow and signal with port"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1 1
+		chk_rm_nr 1 1
+	fi
+
+	# subflows and signal with port, flush
+	if reset "flush subflows and signal with port"; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		addr_nr_ns1=-8 addr_nr_ns2=-2 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 1 1
+		chk_rm_nr 1 3 invert simult
+	fi
+
+	# multiple addresses with port
+	if reset "multiple addresses with port"; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100
+		pm_nl_set_limits $ns2 2 2
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 2 2 2
+	fi
+
+	# multiple addresses with ports
+	if reset "multiple addresses with ports"; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101
+		pm_nl_set_limits $ns2 2 2
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 2 2 2
+	fi
+}
+
+syncookies_tests()
+{
+	# single subflow, syncookies
+	if reset_with_cookies "single subflow with syn cookies"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+	fi
+
+	# multiple subflows with syn cookies
+	if reset_with_cookies "multiple subflows with syn cookies"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+	fi
+
+	# multiple subflows limited by server
+	if reset_with_cookies "subflows limited by server w cookies"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 1 1
+	fi
+
+	# test signal address with cookies
+	if reset_with_cookies "signal address with syn cookies"; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# test cookie with subflow and signal
+	if reset_with_cookies "subflow and signal w cookies"; then
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 1 2
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 1 1
+	fi
+
+	# accept and use add_addr with additional subflows
+	if reset_with_cookies "subflows and signal w. cookies"; then
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 1 1
+	fi
+}
+
+checksum_tests()
+{
+	# checksum test 0 0
+	if reset_with_checksum 0 0; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# checksum test 1 1
+	if reset_with_checksum 1 1; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# checksum test 0 1
+	if reset_with_checksum 0 1; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# checksum test 1 0
+	if reset_with_checksum 1 0; then
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+}
+
+deny_join_id0_tests()
+{
+	# subflow allow join id0 ns1
+	if reset_with_allow_join_id0 "single subflow allow join id0 ns1" 1 0; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+	fi
+
+	# subflow allow join id0 ns2
+	if reset_with_allow_join_id0 "single subflow allow join id0 ns2" 0 1; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# signal address allow join id0 ns1
+	# ADD_ADDRs are not affected by allow_join_id0 value.
+	if reset_with_allow_join_id0 "signal address allow join id0 ns1" 1 0; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# signal address allow join id0 ns2
+	# ADD_ADDRs are not affected by allow_join_id0 value.
+	if reset_with_allow_join_id0 "signal address allow join id0 ns2" 0 1; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# subflow and address allow join id0 ns1
+	if reset_with_allow_join_id0 "subflow and address allow join id0 1" 1 0; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+	fi
+
+	# subflow and address allow join id0 ns2
+	if reset_with_allow_join_id0 "subflow and address allow join id0 2" 0 1; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+	fi
+}
+
+fullmesh_tests()
+{
+	# fullmesh 1
+	# 2 fullmesh addrs in ns2, added before the connection,
+	# 1 non-fullmesh addr in ns1, added during the connection.
+	if reset "fullmesh test 2x1"; then
+		pm_nl_set_limits $ns1 0 4
+		pm_nl_set_limits $ns2 1 4
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
+		addr_nr_ns1=1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 4 4 4
+		chk_add_nr 1 1
+	fi
+
+	# fullmesh 2
+	# 1 non-fullmesh addr in ns1, added before the connection,
+	# 1 fullmesh addr in ns2, added during the connection.
+	if reset "fullmesh test 1x1"; then
+		pm_nl_set_limits $ns1 1 3
+		pm_nl_set_limits $ns2 1 3
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		fullmesh=1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
+		chk_add_nr 1 1
+	fi
+
+	# fullmesh 3
+	# 1 non-fullmesh addr in ns1, added before the connection,
+	# 2 fullmesh addrs in ns2, added during the connection.
+	if reset "fullmesh test 1x2"; then
+		pm_nl_set_limits $ns1 2 5
+		pm_nl_set_limits $ns2 1 5
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		fullmesh=2 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 5 5 5
+		chk_add_nr 1 1
+	fi
+
+	# fullmesh 4
+	# 1 non-fullmesh addr in ns1, added before the connection,
+	# 2 fullmesh addrs in ns2, added during the connection,
+	# limit max_subflows to 4.
+	if reset "fullmesh test 1x2, limited"; then
+		pm_nl_set_limits $ns1 2 4
+		pm_nl_set_limits $ns2 1 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		fullmesh=2 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 4 4 4
+		chk_add_nr 1 1
+	fi
+
+	# set fullmesh flag
+	if reset "set fullmesh flag test" &&
+	   continue_if mptcp_lib_kversion_ge 5.18; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+		pm_nl_set_limits $ns2 4 4
+		addr_nr_ns2=1 sflags=fullmesh speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_rm_nr 0 1
+	fi
+
+	# set nofullmesh flag
+	if reset "set nofullmesh flag test" &&
+	   continue_if mptcp_lib_kversion_ge 5.18; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
+		pm_nl_set_limits $ns2 4 4
+		fullmesh=1 sflags=nofullmesh speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_rm_nr 0 1
+	fi
+
+	# set backup,fullmesh flags
+	if reset "set backup,fullmesh flags test" &&
+	   continue_if mptcp_lib_kversion_ge 5.18; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+		pm_nl_set_limits $ns2 4 4
+		addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_prio_nr 0 1
+		chk_rm_nr 0 1
+	fi
+
+	# set nobackup,nofullmesh flags
+	if reset "set nobackup,nofullmesh flags test" &&
+	   continue_if mptcp_lib_kversion_ge 5.18; then
+		pm_nl_set_limits $ns1 4 4
+		pm_nl_set_limits $ns2 4 4
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
+		sflags=nobackup,nofullmesh speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_prio_nr 0 1
+		chk_rm_nr 0 1
+	fi
+}
+
+fastclose_tests()
+{
+	if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
+		test_linkfail=1024 fastclose=client \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_fclose_nr 1 1
+		chk_rst_nr 1 1 invert
+	fi
+
+	if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
+		test_linkfail=1024 fastclose=server \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0 0 0 0 1
+		chk_fclose_nr 1 1 invert
+		chk_rst_nr 1 1
+	fi
+}
+
+pedit_action_pkts()
+{
+	tc -n $ns2 -j -s action show action pedit index 100 | \
+		mptcp_lib_get_info_value \"packets\" packets
+}
+
+fail_tests()
+{
+	# single subflow
+	if reset_with_fail "Infinite map" 1; then
+		test_linkfail=128 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+		chk_fail_nr 1 -1 invert
+	fi
+
+	# multiple subflows
+	if reset_with_fail "MP_FAIL MP_RST" 2; then
+		tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+		test_linkfail=1024 \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
+	fi
+}
+
+# $1: ns ; $2: addr ; $3: id
+userspace_pm_add_addr()
+{
+	local evts=$evts_ns1
+	local tk
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+
+	ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
+	sleep 1
+}
+
+# $1: ns ; $2: id
+userspace_pm_rm_addr()
+{
+	local evts=$evts_ns1
+	local tk
+	local cnt
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+
+	cnt=$(rm_addr_count ${1})
+	ip netns exec $1 ./pm_nl_ctl rem token $tk id $2
+	wait_rm_addr $1 "${cnt}"
+}
+
+# $1: ns ; $2: addr ; $3: id
+userspace_pm_add_sf()
+{
+	local evts=$evts_ns1
+	local tk da dp
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+	da=$(mptcp_lib_evts_get_info daddr4 "$evts")
+	dp=$(mptcp_lib_evts_get_info dport "$evts")
+
+	ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
+				rip $da rport $dp token $tk
+	sleep 1
+}
+
+# $1: ns ; $2: addr $3: event type
+userspace_pm_rm_sf()
+{
+	local evts=$evts_ns1
+	local t=${3:-1}
+	local ip
+	local tk da dp sp
+	local cnt
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	[ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4
+	[ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+	da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2)
+	dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2)
+	sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2)
+
+	cnt=$(rm_sf_count ${1})
+	ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \
+				rip $da rport $dp token $tk
+	wait_rm_sf $1 "${cnt}"
+}
+
+check_output()
+{
+	local cmd="$1"
+	local expected="$2"
+	local msg="$3"
+	local rc=0
+
+	mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+	if [ ${rc} -eq 2 ]; then
+		fail_test "fail to check output # error ${rc}"
+	elif [ ${rc} -eq 0 ]; then
+		print_ok
+	elif [ ${rc} -eq 1 ]; then
+		fail_test "fail to check output # different output"
+	fi
+}
+
+# $1: ns
+userspace_pm_dump()
+{
+	local evts=$evts_ns1
+	local tk
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+
+	ip netns exec $1 ./pm_nl_ctl dump token $tk
+}
+
+# $1: ns ; $2: id
+userspace_pm_get_addr()
+{
+	local evts=$evts_ns1
+	local tk
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+
+	ip netns exec $1 ./pm_nl_ctl get $2 token $tk
+}
+
+userspace_pm_chk_dump_addr()
+{
+	local ns="${1}"
+	local exp="${2}"
+	local check="${3}"
+
+	print_check "dump addrs ${check}"
+
+	if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then
+		check_output "userspace_pm_dump ${ns}" "${exp}"
+	else
+		print_skip
+	fi
+}
+
+userspace_pm_chk_get_addr()
+{
+	local ns="${1}"
+	local id="${2}"
+	local exp="${3}"
+
+	print_check "get id ${id} addr"
+
+	if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then
+		check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}"
+	else
+		print_skip
+	fi
+}
+
+userspace_tests()
+{
+	# userspace pm type prevents add_addr
+	if reset "userspace pm type prevents add_addr" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_add_nr 0 0
+	fi
+
+	# userspace pm type does not echo add_addr without daemon
+	if reset "userspace pm no echo w/o daemon" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_add_nr 1 0
+	fi
+
+	# userspace pm type rejects join
+	if reset "userspace pm type rejects join" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 0
+	fi
+
+	# userspace pm type does not send join
+	if reset "userspace pm type does not send join" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# userspace pm type prevents mp_prio
+	if reset "userspace pm type prevents mp_prio" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		sflags=backup speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 0
+		chk_prio_nr 0 0
+	fi
+
+	# userspace pm type prevents rm_addr
+	if reset "userspace pm type prevents rm_addr" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns1
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		addr_nr_ns2=-1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_rm_nr 0 0
+	fi
+
+	# userspace pm add & remove address
+	if reset_with_events "userspace pm add & remove address" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns2 2 2
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns1
+		userspace_pm_add_addr $ns1 10.0.2.1 10
+		userspace_pm_add_addr $ns1 10.0.3.1 20
+		chk_join_nr 2 2 2
+		chk_add_nr 2 2
+		chk_mptcp_info subflows 2 subflows 2
+		chk_subflows_total 3 3
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+		userspace_pm_chk_dump_addr "${ns1}" \
+			$'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \
+			"signal"
+		userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1"
+		userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1"
+		userspace_pm_rm_addr $ns1 10
+		userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+		userspace_pm_chk_dump_addr "${ns1}" \
+			"id 20 flags signal 10.0.3.1" "after rm_addr 10"
+		userspace_pm_rm_addr $ns1 20
+		userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+		userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20"
+		chk_rm_nr 2 2 invert
+		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
+		kill_events_pids
+		mptcp_lib_kill_wait $tests_pid
+	fi
+
+	# userspace pm create destroy subflow
+	if reset_with_events "userspace pm create destroy subflow" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns2
+		userspace_pm_add_sf $ns2 10.0.3.2 20
+		chk_join_nr 1 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		userspace_pm_chk_dump_addr "${ns2}" \
+			"id 20 flags subflow 10.0.3.2" \
+			"subflow"
+		userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2"
+		userspace_pm_rm_addr $ns2 20
+		userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+		userspace_pm_chk_dump_addr "${ns2}" \
+			"" \
+			"after rm_addr 20"
+		chk_rm_nr 1 1
+		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
+		kill_events_pids
+		mptcp_lib_kill_wait $tests_pid
+	fi
+
+	# userspace pm create id 0 subflow
+	if reset_with_events "userspace pm create id 0 subflow" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns2
+		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
+		userspace_pm_add_sf $ns2 10.0.3.2 0
+		userspace_pm_chk_dump_addr "${ns2}" \
+			"id 0 flags subflow 10.0.3.2" "id 0 subflow"
+		chk_join_nr 1 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		kill_events_pids
+		mptcp_lib_kill_wait $tests_pid
+	fi
+
+	# userspace pm remove initial subflow
+	if reset_with_events "userspace pm remove initial subflow" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns2
+		userspace_pm_add_sf $ns2 10.0.3.2 20
+		chk_join_nr 1 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		userspace_pm_rm_sf $ns2 10.0.1.2
+		# we don't look at the counter linked to the RM_ADDR but
+		# to the one linked to the subflows that have been removed
+		chk_rm_nr 0 1
+		chk_rst_nr 0 0 invert
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 1 1
+		kill_events_pids
+		mptcp_lib_kill_wait $tests_pid
+	fi
+
+	# userspace pm send RM_ADDR for ID 0
+	if reset_with_events "userspace pm send RM_ADDR for ID 0" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns2 1 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns1
+		userspace_pm_add_addr $ns1 10.0.2.1 10
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+		userspace_pm_rm_addr $ns1 0
+		# we don't look at the counter linked to the subflows that
+		# have been removed but to the one linked to the RM_ADDR
+		chk_rm_nr 1 0 invert
+		chk_rst_nr 0 0 invert
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 1 1
+		kill_events_pids
+		mptcp_lib_kill_wait $tests_pid
+	fi
+}
+
+endpoint_tests()
+{
+	# subflow_rebuild_header is needed to support the implicit flag
+	# userspace pm type prevents add_addr
+	if reset "implicit EP" &&
+	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+		pm_nl_set_limits $ns1 2 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+
+		wait_mpj $ns1
+		pm_nl_check_endpoint "creation" \
+			$ns2 10.0.2.2 id 1 flags implicit
+		chk_mptcp_info subflows 1 subflows 1
+		chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+
+		pm_nl_add_endpoint $ns2 10.0.2.2 id 33 2>/dev/null
+		pm_nl_check_endpoint "ID change is prevented" \
+			$ns2 10.0.2.2 id 1 flags implicit
+
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+		pm_nl_check_endpoint "modif is allowed" \
+			$ns2 10.0.2.2 id 1 flags signal
+		mptcp_lib_kill_wait $tests_pid
+	fi
+
+	if reset "delete and re-add" &&
+	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
+		test_linkfail=4 speed=20 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+
+		wait_mpj $ns2
+		chk_subflow_nr "before delete" 2
+		chk_mptcp_info subflows 1 subflows 1
+
+		pm_nl_del_endpoint $ns2 2 10.0.2.2
+		sleep 0.5
+		chk_subflow_nr "after delete" 1
+		chk_mptcp_info subflows 0 subflows 0
+
+		pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+		wait_mpj $ns2
+		chk_subflow_nr "after re-add" 2
+		chk_mptcp_info subflows 1 subflows 1
+		mptcp_lib_kill_wait $tests_pid
+	fi
+}
+
+# [$1: error message]
+usage()
+{
+	if [ -n "${1}" ]; then
+		echo "${1}"
+		ret=${KSFT_FAIL}
+	fi
+
+	echo "mptcp_join usage:"
+
+	local key
+	for key in "${!all_tests[@]}"; do
+		echo "  -${key} ${all_tests[${key}]}"
+	done
+
+	echo "  -c capture pcap files"
+	echo "  -C enable data checksum"
+	echo "  -i use ip mptcp"
+	echo "  -h help"
+
+	echo "[test ids|names]"
+
+	exit ${ret}
+}
+
+
+# Use a "simple" array to force an specific order we cannot have with an associative one
+all_tests_sorted=(
+	f@subflows_tests
+	e@subflows_error_tests
+	s@signal_address_tests
+	l@link_failure_tests
+	t@add_addr_timeout_tests
+	r@remove_tests
+	a@add_tests
+	6@ipv6_tests
+	4@v4mapped_tests
+	M@mixed_tests
+	b@backup_tests
+	p@add_addr_ports_tests
+	k@syncookies_tests
+	S@checksum_tests
+	d@deny_join_id0_tests
+	m@fullmesh_tests
+	z@fastclose_tests
+	F@fail_tests
+	u@userspace_tests
+	I@endpoint_tests
+)
+
+all_tests_args=""
+all_tests_names=()
+for subtests in "${all_tests_sorted[@]}"; do
+	key="${subtests%@*}"
+	value="${subtests#*@}"
+
+	all_tests_args+="${key}"
+	all_tests_names+=("${value}")
+	all_tests[${key}]="${value}"
+done
+
+tests=()
+while getopts "${all_tests_args}cCih" opt; do
+	case $opt in
+		["${all_tests_args}"])
+			tests+=("${all_tests[${opt}]}")
+			;;
+		c)
+			capture=true
+			;;
+		C)
+			checksum=true
+			;;
+		i)
+			ip_mptcp=1
+			;;
+		h)
+			usage
+			;;
+		*)
+			usage "Unknown option: -${opt}"
+			;;
+	esac
+done
+
+shift $((OPTIND - 1))
+
+for arg in "${@}"; do
+	if [[ "${arg}" =~ ^[0-9]+$ ]]; then
+		only_tests_ids+=("${arg}")
+	else
+		only_tests_names+=("${arg}")
+	fi
+done
+
+if [ ${#tests[@]} -eq 0 ]; then
+	tests=("${all_tests_names[@]}")
+fi
+
+for subtests in "${tests[@]}"; do
+	"${subtests}"
+done
+
+if [ ${ret} -ne 0 ]; then
+	echo
+	echo "${#failed_tests[@]} failure(s) has(ve) been detected:"
+	for i in $(get_failed_tests_ids); do
+		echo -e "\t- ${i}: ${failed_tests[${i}]}"
+	done
+	echo
+fi
+
+append_prev_results
+mptcp_lib_result_print_all_tap
 
 exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
new file mode 100644
index 000000000000..d529b4b37af8
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -0,0 +1,507 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly KSFT_PASS=0
+readonly KSFT_FAIL=1
+readonly KSFT_SKIP=4
+
+# shellcheck disable=SC2155 # declare and assign separately
+readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
+
+# These variables are used in some selftests, read-only
+declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6         # MPTCP_EVENT_ANNOUNCED
+declare -rx MPTCP_LIB_EVENT_REMOVED=7           # MPTCP_EVENT_REMOVED
+declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10  # MPTCP_EVENT_SUB_ESTABLISHED
+declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11       # MPTCP_EVENT_SUB_CLOSED
+declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED
+declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16  # MPTCP_EVENT_LISTENER_CLOSED
+
+declare -rx MPTCP_LIB_AF_INET=2
+declare -rx MPTCP_LIB_AF_INET6=10
+
+MPTCP_LIB_SUBTESTS=()
+MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_TEST_COUNTER=0
+MPTCP_LIB_TEST_FORMAT="%02u %-50s"
+
+# only if supported (or forced) and not disabled, see no-color.org
+if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } &&
+   [ "${NO_COLOR:-}" != "1" ]; then
+	readonly MPTCP_LIB_COLOR_RED="\E[1;31m"
+	readonly MPTCP_LIB_COLOR_GREEN="\E[1;32m"
+	readonly MPTCP_LIB_COLOR_YELLOW="\E[1;33m"
+	readonly MPTCP_LIB_COLOR_BLUE="\E[1;34m"
+	readonly MPTCP_LIB_COLOR_RESET="\E[0m"
+else
+	readonly MPTCP_LIB_COLOR_RED=
+	readonly MPTCP_LIB_COLOR_GREEN=
+	readonly MPTCP_LIB_COLOR_YELLOW=
+	readonly MPTCP_LIB_COLOR_BLUE=
+	readonly MPTCP_LIB_COLOR_RESET=
+fi
+
+# $1: color, $2: text
+mptcp_lib_print_color() {
+	echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
+}
+
+mptcp_lib_print_ok() {
+	mptcp_lib_print_color "${MPTCP_LIB_COLOR_GREEN}${*}"
+}
+
+mptcp_lib_print_warn() {
+	mptcp_lib_print_color "${MPTCP_LIB_COLOR_YELLOW}${*}"
+}
+
+mptcp_lib_print_info() {
+	mptcp_lib_print_color "${MPTCP_LIB_COLOR_BLUE}${*}"
+}
+
+mptcp_lib_print_err() {
+	mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}"
+}
+
+# shellcheck disable=SC2120 # parameters are optional
+mptcp_lib_pr_ok() {
+	mptcp_lib_print_ok "[ OK ]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_skip() {
+	mptcp_lib_print_warn "[SKIP]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_fail() {
+	mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_info() {
+	mptcp_lib_print_info "INFO: ${*}"
+}
+
+# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
+# features using the last version of the kernel and the selftests to make sure
+# a test is not being skipped by mistake.
+mptcp_lib_expect_all_features() {
+	[ "${SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES:-}" = "1" ]
+}
+
+# $1: msg
+mptcp_lib_fail_if_expected_feature() {
+	if mptcp_lib_expect_all_features; then
+		echo "ERROR: missing feature: ${*}"
+		exit ${KSFT_FAIL}
+	fi
+
+	return 1
+}
+
+# $1: file
+mptcp_lib_has_file() {
+	local f="${1}"
+
+	if [ -f "${f}" ]; then
+		return 0
+	fi
+
+	mptcp_lib_fail_if_expected_feature "${f} file not found"
+}
+
+mptcp_lib_check_mptcp() {
+	if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then
+		mptcp_lib_pr_skip "MPTCP support is not available"
+		exit ${KSFT_SKIP}
+	fi
+}
+
+mptcp_lib_check_kallsyms() {
+	if ! mptcp_lib_has_file "/proc/kallsyms"; then
+		mptcp_lib_pr_skip "CONFIG_KALLSYMS is missing"
+		exit ${KSFT_SKIP}
+	fi
+}
+
+# Internal: use mptcp_lib_kallsyms_has() instead
+__mptcp_lib_kallsyms_has() {
+	local sym="${1}"
+
+	mptcp_lib_check_kallsyms
+
+	grep -q " ${sym}" /proc/kallsyms
+}
+
+# $1: part of a symbol to look at, add '$' at the end for full name
+mptcp_lib_kallsyms_has() {
+	local sym="${1}"
+
+	if __mptcp_lib_kallsyms_has "${sym}"; then
+		return 0
+	fi
+
+	mptcp_lib_fail_if_expected_feature "${sym} symbol not found"
+}
+
+# $1: part of a symbol to look at, add '$' at the end for full name
+mptcp_lib_kallsyms_doesnt_have() {
+	local sym="${1}"
+
+	if ! __mptcp_lib_kallsyms_has "${sym}"; then
+		return 0
+	fi
+
+	mptcp_lib_fail_if_expected_feature "${sym} symbol has been found"
+}
+
+# !!!AVOID USING THIS!!!
+# Features might not land in the expected version and features can be backported
+#
+# $1: kernel version, e.g. 6.3
+mptcp_lib_kversion_ge() {
+	local exp_maj="${1%.*}"
+	local exp_min="${1#*.}"
+	local v maj min
+
+	# If the kernel has backported features, set this env var to 1:
+	if [ "${SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK:-}" = "1" ]; then
+		return 0
+	fi
+
+	v=$(uname -r | cut -d'.' -f1,2)
+	maj=${v%.*}
+	min=${v#*.}
+
+	if   [ "${maj}" -gt "${exp_maj}" ] ||
+	   { [ "${maj}" -eq "${exp_maj}" ] && [ "${min}" -ge "${exp_min}" ]; }; then
+		return 0
+	fi
+
+	mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
+}
+
+__mptcp_lib_result_check_duplicated() {
+	local subtest
+
+	for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
+		if [[ "${subtest}" == *" - ${KSFT_TEST}: ${*%% #*}" ]]; then
+			MPTCP_LIB_SUBTESTS_DUPLICATED=1
+			mptcp_lib_print_err "Duplicated entry: ${*}"
+			break
+		fi
+	done
+}
+
+__mptcp_lib_result_add() {
+	local result="${1}"
+	shift
+
+	local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
+
+	__mptcp_lib_result_check_duplicated "${*}"
+
+	MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
+}
+
+# $1: test name
+mptcp_lib_result_pass() {
+	__mptcp_lib_result_add "ok" "${1}"
+}
+
+# $1: test name
+mptcp_lib_result_fail() {
+	__mptcp_lib_result_add "not ok" "${1}"
+}
+
+# $1: test name
+mptcp_lib_result_skip() {
+	__mptcp_lib_result_add "ok" "${1} # SKIP"
+}
+
+# $1: result code ; $2: test name
+mptcp_lib_result_code() {
+	local ret="${1}"
+	local name="${2}"
+
+	case "${ret}" in
+		"${KSFT_PASS}")
+			mptcp_lib_result_pass "${name}"
+			;;
+		"${KSFT_FAIL}")
+			mptcp_lib_result_fail "${name}"
+			;;
+		"${KSFT_SKIP}")
+			mptcp_lib_result_skip "${name}"
+			;;
+		*)
+			echo "ERROR: wrong result code: ${ret}"
+			exit ${KSFT_FAIL}
+			;;
+	esac
+}
+
+mptcp_lib_result_print_all_tap() {
+	local subtest
+
+	if [ ${#MPTCP_LIB_SUBTESTS[@]} -eq 0 ] ||
+	   [ "${SELFTESTS_MPTCP_LIB_NO_TAP:-}" = "1" ]; then
+		return
+	fi
+
+	printf "\nTAP version 13\n"
+	printf "1..%d\n" "${#MPTCP_LIB_SUBTESTS[@]}"
+
+	for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
+		printf "%s\n" "${subtest}"
+	done
+
+	if [ "${MPTCP_LIB_SUBTESTS_DUPLICATED}" = 1 ] &&
+	   mptcp_lib_expect_all_features; then
+		mptcp_lib_print_err "Duplicated test entries"
+		exit ${KSFT_FAIL}
+	fi
+}
+
+# get the value of keyword $1 in the line marked by keyword $2
+mptcp_lib_get_info_value() {
+	grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+}
+
+# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
+mptcp_lib_evts_get_info() {
+	grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+}
+
+# $1: PID
+mptcp_lib_kill_wait() {
+	[ "${1}" -eq 0 ] && return 0
+
+	kill -SIGUSR1 "${1}" > /dev/null 2>&1
+	kill "${1}" > /dev/null 2>&1
+	wait "${1}" 2>/dev/null
+}
+
+# $1: IP address
+mptcp_lib_is_v6() {
+	[ -z "${1##*:*}" ]
+}
+
+# $1: ns, $2: MIB counter
+mptcp_lib_get_counter() {
+	local ns="${1}"
+	local counter="${2}"
+	local count
+
+	count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+		awk 'NR==1 {next} {print $2}')
+	if [ -z "${count}" ]; then
+		mptcp_lib_fail_if_expected_feature "${counter} counter"
+		return 1
+	fi
+
+	echo "${count}"
+}
+
+mptcp_lib_make_file() {
+	local name="${1}"
+	local bs="${2}"
+	local size="${3}"
+
+	dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null
+	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}"
+}
+
+# $1: file
+mptcp_lib_print_file_err() {
+	ls -l "${1}" 1>&2
+	echo "Trailing bytes are: "
+	tail -c 27 "${1}"
+}
+
+# $1: input file ; $2: output file ; $3: what kind of file
+mptcp_lib_check_transfer() {
+	local in="${1}"
+	local out="${2}"
+	local what="${3}"
+
+	if ! cmp "$in" "$out" > /dev/null 2>&1; then
+		mptcp_lib_pr_fail "$what does not match (in, out):"
+		mptcp_lib_print_file_err "$in"
+		mptcp_lib_print_file_err "$out"
+
+		return 1
+	fi
+
+	return 0
+}
+
+# $1: ns, $2: port
+mptcp_lib_wait_local_port_listen() {
+	local listener_ns="${1}"
+	local port="${2}"
+
+	local port_hex
+	port_hex="$(printf "%04X" "${port}")"
+
+	local _
+	for _ in $(seq 10); do
+		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \
+			     {rc=0; exit}} END {exit rc}" &&
+			break
+		sleep 0.1
+	done
+}
+
+mptcp_lib_check_output() {
+	local err="${1}"
+	local cmd="${2}"
+	local expected="${3}"
+	local cmd_ret=0
+	local out
+
+	if ! out=$(${cmd} 2>"${err}"); then
+		cmd_ret=${?}
+	fi
+
+	if [ ${cmd_ret} -ne 0 ]; then
+		mptcp_lib_pr_fail "command execution '${cmd}' stderr"
+		cat "${err}"
+		return 2
+	elif [ "${out}" = "${expected}" ]; then
+		return 0
+	else
+		mptcp_lib_pr_fail "expected '${expected}' got '${out}'"
+		return 1
+	fi
+}
+
+mptcp_lib_check_tools() {
+	local tool
+
+	for tool in "${@}"; do
+		case "${tool}" in
+		"ip")
+			if ! ip -Version &> /dev/null; then
+				mptcp_lib_pr_skip "Could not run test without ip tool"
+				exit ${KSFT_SKIP}
+			fi
+			;;
+		"ss")
+			if ! ss -h | grep -q MPTCP; then
+				mptcp_lib_pr_skip "ss tool does not support MPTCP"
+				exit ${KSFT_SKIP}
+			fi
+			;;
+		"iptables"* | "ip6tables"*)
+			if ! "${tool}" -V &> /dev/null; then
+				mptcp_lib_pr_skip "Could not run all tests without ${tool}"
+				exit ${KSFT_SKIP}
+			fi
+			;;
+		*)
+			mptcp_lib_pr_fail "Internal error: unsupported tool: ${tool}"
+			exit ${KSFT_FAIL}
+			;;
+		esac
+	done
+}
+
+mptcp_lib_ns_init() {
+	local sec rndh
+
+	sec=$(date +%s)
+	rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX)
+
+	local netns
+	for netns in "${@}"; do
+		eval "${netns}=${netns}-${rndh}"
+
+		ip netns add "${!netns}" || exit ${KSFT_SKIP}
+		ip -net "${!netns}" link set lo up
+		ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
+		ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
+		ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
+	done
+}
+
+mptcp_lib_ns_exit() {
+	local netns
+	for netns in "${@}"; do
+		ip netns del "${netns}"
+		rm -f /tmp/"${netns}".{nstat,out}
+	done
+}
+
+mptcp_lib_events() {
+	local ns="${1}"
+	local evts="${2}"
+	declare -n pid="${3}"
+
+	:>"${evts}"
+
+	mptcp_lib_kill_wait "${pid:-0}"
+	ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 &
+	pid=$!
+}
+
+mptcp_lib_print_title() {
+	: "${MPTCP_LIB_TEST_COUNTER:?}"
+	: "${MPTCP_LIB_TEST_FORMAT:?}"
+
+	# shellcheck disable=SC2059 # the format is in a variable
+	printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}"
+}
+
+# $1: var name ; $2: prev ret
+mptcp_lib_check_expected_one() {
+	local var="${1}"
+	local exp="e_${var}"
+	local prev_ret="${2}"
+
+	if [ "${!var}" = "${!exp}" ]; then
+		return 0
+	fi
+
+	if [ "${prev_ret}" = "0" ]; then
+		mptcp_lib_pr_fail
+	fi
+
+	mptcp_lib_print_err "Expected value for '${var}': '${!exp}', got '${!var}'."
+	return 1
+}
+
+# $@: all var names to check
+mptcp_lib_check_expected() {
+	local rc=0
+	local var
+
+	for var in "${@}"; do
+		mptcp_lib_check_expected_one "${var}" "${rc}" || rc=1
+	done
+
+	return "${rc}"
+}
+
+# shellcheck disable=SC2034 # Some variables are used below but indirectly
+mptcp_lib_verify_listener_events() {
+	local evt=${1}
+	local e_type=${2}
+	local e_family=${3}
+	local e_saddr=${4}
+	local e_sport=${5}
+	local type
+	local family
+	local saddr
+	local sport
+	local rc=0
+
+	type=$(mptcp_lib_evts_get_info type "${evt}" "${e_type}")
+	family=$(mptcp_lib_evts_get_info family "${evt}" "${e_type}")
+	if [ "${family}" ] && [ "${family}" = "${AF_INET6}" ]; then
+		saddr=$(mptcp_lib_evts_get_info saddr6 "${evt}" "${e_type}")
+	else
+		saddr=$(mptcp_lib_evts_get_info saddr4 "${evt}" "${e_type}")
+	fi
+	sport=$(mptcp_lib_evts_get_info sport "${evt}" "${e_type}")
+
+	mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}"
+	return "${rc}"
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
new file mode 100644
index 000000000000..926b0be87c99
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -0,0 +1,866 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <netdb.h>
+#include <netinet/in.h>
+
+#include <linux/tcp.h>
+
+static int pf = AF_INET;
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+#ifndef SOL_MPTCP
+#define SOL_MPTCP 284
+#endif
+
+#ifndef MPTCP_INFO
+struct mptcp_info {
+	__u8	mptcpi_subflows;
+	__u8	mptcpi_add_addr_signal;
+	__u8	mptcpi_add_addr_accepted;
+	__u8	mptcpi_subflows_max;
+	__u8	mptcpi_add_addr_signal_max;
+	__u8	mptcpi_add_addr_accepted_max;
+	__u32	mptcpi_flags;
+	__u32	mptcpi_token;
+	__u64	mptcpi_write_seq;
+	__u64	mptcpi_snd_una;
+	__u64	mptcpi_rcv_nxt;
+	__u8	mptcpi_local_addr_used;
+	__u8	mptcpi_local_addr_max;
+	__u8	mptcpi_csum_enabled;
+	__u32	mptcpi_retransmits;
+	__u64	mptcpi_bytes_retrans;
+	__u64	mptcpi_bytes_sent;
+	__u64	mptcpi_bytes_received;
+	__u64	mptcpi_bytes_acked;
+};
+
+struct mptcp_subflow_data {
+	__u32		size_subflow_data;		/* size of this structure in userspace */
+	__u32		num_subflows;			/* must be 0, set by kernel */
+	__u32		size_kernel;			/* must be 0, set by kernel */
+	__u32		size_user;			/* size of one element in data[] */
+} __attribute__((aligned(8)));
+
+struct mptcp_subflow_addrs {
+	union {
+		__kernel_sa_family_t sa_family;
+		struct sockaddr sa_local;
+		struct sockaddr_in sin_local;
+		struct sockaddr_in6 sin6_local;
+		struct __kernel_sockaddr_storage ss_local;
+	};
+	union {
+		struct sockaddr sa_remote;
+		struct sockaddr_in sin_remote;
+		struct sockaddr_in6 sin6_remote;
+		struct __kernel_sockaddr_storage ss_remote;
+	};
+};
+
+#define MPTCP_INFO		1
+#define MPTCP_TCPINFO		2
+#define MPTCP_SUBFLOW_ADDRS	3
+#endif
+
+#ifndef MPTCP_FULL_INFO
+struct mptcp_subflow_info {
+	__u32				id;
+	struct mptcp_subflow_addrs	addrs;
+};
+
+struct mptcp_full_info {
+	__u32		size_tcpinfo_kernel;	/* must be 0, set by kernel */
+	__u32		size_tcpinfo_user;
+	__u32		size_sfinfo_kernel;	/* must be 0, set by kernel */
+	__u32		size_sfinfo_user;
+	__u32		num_subflows;		/* must be 0, set by kernel (real subflow count) */
+	__u32		size_arrays_user;	/* max subflows that userspace is interested in;
+						 * the buffers at subflow_info/tcp_info
+						 * are respectively at least:
+						 *  size_arrays * size_sfinfo_user
+						 *  size_arrays * size_tcpinfo_user
+						 * bytes wide
+						 */
+	__aligned_u64		subflow_info;
+	__aligned_u64		tcp_info;
+	struct mptcp_info	mptcp_info;
+};
+
+#define MPTCP_FULL_INFO		4
+#endif
+
+struct so_state {
+	struct mptcp_info mi;
+	struct mptcp_info last_sample;
+	struct tcp_info tcp_info;
+	struct mptcp_subflow_addrs addrs;
+	uint64_t mptcpi_rcv_delta;
+	uint64_t tcpi_rcv_delta;
+	bool pkt_stats_avail;
+};
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+static void die_perror(const char *msg)
+{
+	perror(msg);
+	exit(1);
+}
+
+static void die_usage(int r)
+{
+	fprintf(stderr, "Usage: mptcp_sockopt [-6]\n");
+	exit(r);
+}
+
+static void xerror(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	fputc('\n', stderr);
+	exit(1);
+}
+
+static const char *getxinfo_strerr(int err)
+{
+	if (err == EAI_SYSTEM)
+		return strerror(errno);
+
+	return gai_strerror(err);
+}
+
+static void xgetaddrinfo(const char *node, const char *service,
+			 const struct addrinfo *hints,
+			 struct addrinfo **res)
+{
+	int err = getaddrinfo(node, service, hints, res);
+
+	if (err) {
+		const char *errstr = getxinfo_strerr(err);
+
+		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
+			node ? node : "", service ? service : "", errstr);
+		exit(1);
+	}
+}
+
+static int sock_listen_mptcp(const char * const listenaddr,
+			     const char * const port)
+{
+	int sock = -1;
+	struct addrinfo hints = {
+		.ai_protocol = IPPROTO_TCP,
+		.ai_socktype = SOCK_STREAM,
+		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
+	};
+
+	hints.ai_family = pf;
+
+	struct addrinfo *a, *addr;
+	int one = 1;
+
+	xgetaddrinfo(listenaddr, port, &hints, &addr);
+	hints.ai_family = pf;
+
+	for (a = addr; a; a = a->ai_next) {
+		sock = socket(a->ai_family, a->ai_socktype, IPPROTO_MPTCP);
+		if (sock < 0)
+			continue;
+
+		if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
+				     sizeof(one)))
+			perror("setsockopt");
+
+		if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
+			break; /* success */
+
+		perror("bind");
+		close(sock);
+		sock = -1;
+	}
+
+	freeaddrinfo(addr);
+
+	if (sock < 0)
+		xerror("could not create listen socket");
+
+	if (listen(sock, 20))
+		die_perror("listen");
+
+	return sock;
+}
+
+static int sock_connect_mptcp(const char * const remoteaddr,
+			      const char * const port, int proto)
+{
+	struct addrinfo hints = {
+		.ai_protocol = IPPROTO_TCP,
+		.ai_socktype = SOCK_STREAM,
+	};
+	struct addrinfo *a, *addr;
+	int sock = -1;
+
+	hints.ai_family = pf;
+
+	xgetaddrinfo(remoteaddr, port, &hints, &addr);
+	for (a = addr; a; a = a->ai_next) {
+		sock = socket(a->ai_family, a->ai_socktype, proto);
+		if (sock < 0)
+			continue;
+
+		if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+			break; /* success */
+
+		die_perror("connect");
+	}
+
+	if (sock < 0)
+		xerror("could not create connect socket");
+
+	freeaddrinfo(addr);
+	return sock;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "h6")) != -1) {
+		switch (c) {
+		case 'h':
+			die_usage(0);
+			break;
+		case '6':
+			pf = AF_INET6;
+			break;
+		default:
+			die_usage(1);
+			break;
+		}
+	}
+}
+
+static void do_getsockopt_bogus_sf_data(int fd, int optname)
+{
+	struct mptcp_subflow_data good_data;
+	struct bogus_data {
+		struct mptcp_subflow_data d;
+		char buf[2];
+	} bd;
+	socklen_t olen, _olen;
+	int ret;
+
+	memset(&bd, 0, sizeof(bd));
+	memset(&good_data, 0, sizeof(good_data));
+
+	olen = sizeof(good_data);
+	good_data.size_subflow_data = olen;
+
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret < 0); /* 0 size_subflow_data */
+	assert(olen == sizeof(good_data));
+
+	bd.d = good_data;
+
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret == 0);
+	assert(olen == sizeof(good_data));
+	assert(bd.d.num_subflows == 1);
+	assert(bd.d.size_kernel > 0);
+	assert(bd.d.size_user == 0);
+
+	bd.d = good_data;
+	_olen = rand() % olen;
+	olen = _olen;
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret < 0);	/* bogus olen */
+	assert(olen == _olen);	/* must be unchanged */
+
+	bd.d = good_data;
+	olen = sizeof(good_data);
+	bd.d.size_kernel = 1;
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret < 0); /* size_kernel not 0 */
+
+	bd.d = good_data;
+	olen = sizeof(good_data);
+	bd.d.num_subflows = 1;
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret < 0); /* num_subflows not 0 */
+
+	/* forward compat check: larger struct mptcp_subflow_data on 'old' kernel */
+	bd.d = good_data;
+	olen = sizeof(bd);
+	bd.d.size_subflow_data = sizeof(bd);
+
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret == 0);
+
+	/* olen must be truncated to real data size filled by kernel: */
+	assert(olen == sizeof(good_data));
+
+	assert(bd.d.size_subflow_data == sizeof(bd));
+
+	bd.d = good_data;
+	bd.d.size_subflow_data += 1;
+	bd.d.size_user = 1;
+	olen = bd.d.size_subflow_data + 1;
+	_olen = olen;
+
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &_olen);
+	assert(ret == 0);
+
+	/* no truncation, kernel should have filled 1 byte of optname payload in buf[1]: */
+	assert(olen == _olen);
+
+	assert(bd.d.size_subflow_data == sizeof(good_data) + 1);
+	assert(bd.buf[0] == 0);
+}
+
+static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w)
+{
+	struct mptcp_info i;
+	socklen_t olen;
+	int ret;
+
+	olen = sizeof(i);
+	ret = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &i, &olen);
+
+	if (ret < 0)
+		die_perror("getsockopt MPTCP_INFO");
+
+	s->pkt_stats_avail = olen >= sizeof(i);
+
+	s->last_sample = i;
+	if (s->mi.mptcpi_write_seq == 0)
+		s->mi = i;
+
+	assert(s->mi.mptcpi_write_seq + w == i.mptcpi_write_seq);
+
+	s->mptcpi_rcv_delta = i.mptcpi_rcv_nxt - s->mi.mptcpi_rcv_nxt;
+}
+
+static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t w)
+{
+	struct my_tcp_info {
+		struct mptcp_subflow_data d;
+		struct tcp_info ti[2];
+	} ti;
+	int ret, tries = 5;
+	socklen_t olen;
+
+	do {
+		memset(&ti, 0, sizeof(ti));
+
+		ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+		ti.d.size_user = sizeof(struct tcp_info);
+		olen = sizeof(ti);
+
+		ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen);
+		if (ret < 0)
+			xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)");
+
+		assert(olen <= sizeof(ti));
+		assert(ti.d.size_kernel > 0);
+		assert(ti.d.size_user ==
+		       MIN(ti.d.size_kernel, sizeof(struct tcp_info)));
+		assert(ti.d.num_subflows == 1);
+
+		assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data));
+		olen -= sizeof(struct mptcp_subflow_data);
+		assert(olen == ti.d.size_user);
+
+		s->tcp_info = ti.ti[0];
+
+		if (ti.ti[0].tcpi_bytes_sent == w &&
+		    ti.ti[0].tcpi_bytes_received == r)
+			goto done;
+
+		if (r == 0 && ti.ti[0].tcpi_bytes_sent == w &&
+		    ti.ti[0].tcpi_bytes_received) {
+			s->tcpi_rcv_delta = ti.ti[0].tcpi_bytes_received;
+			goto done;
+		}
+
+		/* wait and repeat, might be that tx is still ongoing */
+		sleep(1);
+	} while (tries-- > 0);
+
+	xerror("tcpi_bytes_sent %" PRIu64 ", want %zu. tcpi_bytes_received %" PRIu64 ", want %zu",
+		ti.ti[0].tcpi_bytes_sent, w, ti.ti[0].tcpi_bytes_received, r);
+
+done:
+	do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO);
+}
+
+static void do_getsockopt_subflow_addrs(struct so_state *s, int fd)
+{
+	struct sockaddr_storage remote, local;
+	socklen_t olen, rlen, llen;
+	int ret;
+	struct my_addrs {
+		struct mptcp_subflow_data d;
+		struct mptcp_subflow_addrs addr[2];
+	} addrs;
+
+	memset(&addrs, 0, sizeof(addrs));
+	memset(&local, 0, sizeof(local));
+	memset(&remote, 0, sizeof(remote));
+
+	addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+	addrs.d.size_user = sizeof(struct mptcp_subflow_addrs);
+	olen = sizeof(addrs);
+
+	ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen);
+	if (ret < 0)
+		die_perror("getsockopt MPTCP_SUBFLOW_ADDRS");
+
+	assert(olen <= sizeof(addrs));
+	assert(addrs.d.size_kernel > 0);
+	assert(addrs.d.size_user ==
+	       MIN(addrs.d.size_kernel, sizeof(struct mptcp_subflow_addrs)));
+	assert(addrs.d.num_subflows == 1);
+
+	assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data));
+	olen -= sizeof(struct mptcp_subflow_data);
+	assert(olen == addrs.d.size_user);
+
+	llen = sizeof(local);
+	ret = getsockname(fd, (struct sockaddr *)&local, &llen);
+	if (ret < 0)
+		die_perror("getsockname");
+	rlen = sizeof(remote);
+	ret = getpeername(fd, (struct sockaddr *)&remote, &rlen);
+	if (ret < 0)
+		die_perror("getpeername");
+
+	assert(rlen > 0);
+	assert(rlen == llen);
+
+	assert(remote.ss_family == local.ss_family);
+
+	assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0);
+	assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0);
+	s->addrs = addrs.addr[0];
+
+	memset(&addrs, 0, sizeof(addrs));
+
+	addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+	addrs.d.size_user = sizeof(sa_family_t);
+	olen = sizeof(addrs.d) + sizeof(sa_family_t);
+
+	ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen);
+	assert(ret == 0);
+	assert(olen == sizeof(addrs.d) + sizeof(sa_family_t));
+
+	assert(addrs.addr[0].sa_family == pf);
+	assert(addrs.addr[0].sa_family == local.ss_family);
+
+	assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) != 0);
+	assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) != 0);
+
+	do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS);
+}
+
+static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd)
+{
+	size_t data_size = sizeof(struct mptcp_full_info);
+	struct mptcp_subflow_info sfinfo[2];
+	struct tcp_info tcp_info[2];
+	struct mptcp_full_info mfi;
+	socklen_t olen;
+	int ret;
+
+	memset(&mfi, 0, data_size);
+	memset(tcp_info, 0, sizeof(tcp_info));
+	memset(sfinfo, 0, sizeof(sfinfo));
+
+	mfi.size_tcpinfo_user = sizeof(struct tcp_info);
+	mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info);
+	mfi.size_arrays_user = 2;
+	mfi.subflow_info = (unsigned long)&sfinfo[0];
+	mfi.tcp_info = (unsigned long)&tcp_info[0];
+	olen = data_size;
+
+	ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen);
+	if (ret < 0) {
+		if (errno == EOPNOTSUPP) {
+			perror("MPTCP_FULL_INFO test skipped");
+			return;
+		}
+		xerror("getsockopt MPTCP_FULL_INFO");
+	}
+
+	assert(olen <= data_size);
+	assert(mfi.size_tcpinfo_kernel > 0);
+	assert(mfi.size_tcpinfo_user ==
+	       MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info)));
+	assert(mfi.size_sfinfo_kernel > 0);
+	assert(mfi.size_sfinfo_user ==
+	       MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info)));
+	assert(mfi.num_subflows == 1);
+
+	/* Tolerate future extension to mptcp_info struct and running newer
+	 * test on top of older kernel.
+	 * Anyway any kernel supporting MPTCP_FULL_INFO must at least include
+	 * the following in mptcp_info.
+	 */
+	assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info));
+	assert(mfi.mptcp_info.mptcpi_subflows == 0);
+	assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent);
+	assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received);
+
+	assert(sfinfo[0].id == 1);
+	assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent);
+	assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received);
+	assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs)));
+}
+
+static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w)
+{
+	do_getsockopt_mptcp_info(s, fd, w);
+
+	do_getsockopt_tcp_info(s, fd, r, w);
+
+	do_getsockopt_subflow_addrs(s, fd);
+
+	if (r)
+		do_getsockopt_mptcp_full_info(s, fd);
+}
+
+static void connect_one_server(int fd, int pipefd)
+{
+	char buf[4096], buf2[4096];
+	size_t len, i, total;
+	struct so_state s;
+	bool eof = false;
+	ssize_t ret;
+
+	memset(&s, 0, sizeof(s));
+
+	len = rand() % (sizeof(buf) - 1);
+
+	if (len < 128)
+		len = 128;
+
+	for (i = 0; i < len ; i++) {
+		buf[i] = rand() % 26;
+		buf[i] += 'A';
+	}
+
+	buf[i] = '\n';
+
+	do_getsockopts(&s, fd, 0, 0);
+
+	/* un-block server */
+	ret = read(pipefd, buf2, 4);
+	assert(ret == 4);
+	close(pipefd);
+
+	assert(strncmp(buf2, "xmit", 4) == 0);
+
+	ret = write(fd, buf, len);
+	if (ret < 0)
+		die_perror("write");
+
+	if (ret != (ssize_t)len)
+		xerror("short write");
+
+	total = 0;
+	do {
+		ret = read(fd, buf2 + total, sizeof(buf2) - total);
+		if (ret < 0)
+			die_perror("read");
+		if (ret == 0) {
+			eof = true;
+			break;
+		}
+
+		total += ret;
+	} while (total < len);
+
+	if (total != len)
+		xerror("total %lu, len %lu eof %d\n", total, len, eof);
+
+	if (memcmp(buf, buf2, len))
+		xerror("data corruption");
+
+	if (s.tcpi_rcv_delta)
+		assert(s.tcpi_rcv_delta <= total);
+
+	do_getsockopts(&s, fd, ret, ret);
+
+	if (eof)
+		total += 1; /* sequence advances due to FIN */
+
+	assert(s.mptcpi_rcv_delta == (uint64_t)total);
+	close(fd);
+}
+
+static void process_one_client(int fd, int pipefd)
+{
+	ssize_t ret, ret2, ret3;
+	struct so_state s;
+	char buf[4096];
+
+	memset(&s, 0, sizeof(s));
+	do_getsockopts(&s, fd, 0, 0);
+
+	ret = write(pipefd, "xmit", 4);
+	assert(ret == 4);
+
+	ret = read(fd, buf, sizeof(buf));
+	if (ret < 0)
+		die_perror("read");
+
+	assert(s.mptcpi_rcv_delta <= (uint64_t)ret);
+
+	if (s.tcpi_rcv_delta)
+		assert(s.tcpi_rcv_delta == (uint64_t)ret);
+
+	ret2 = write(fd, buf, ret);
+	if (ret2 < 0)
+		die_perror("write");
+
+	/* wait for hangup */
+	ret3 = read(fd, buf, 1);
+	if (ret3 != 0)
+		xerror("expected EOF, got %lu", ret3);
+
+	do_getsockopts(&s, fd, ret, ret2);
+	if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
+		xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+
+	/* be nice when running on top of older kernel */
+	if (s.pkt_stats_avail) {
+		if (s.last_sample.mptcpi_bytes_sent != ret2)
+			xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+			       s.last_sample.mptcpi_bytes_sent, ret2,
+			       s.last_sample.mptcpi_bytes_sent - ret2);
+		if (s.last_sample.mptcpi_bytes_received != ret)
+			xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+			       s.last_sample.mptcpi_bytes_received, ret,
+			       s.last_sample.mptcpi_bytes_received - ret);
+		if (s.last_sample.mptcpi_bytes_acked != ret)
+			xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
+			       s.last_sample.mptcpi_bytes_acked, ret2,
+			       s.last_sample.mptcpi_bytes_acked - ret2);
+	}
+
+	close(fd);
+}
+
+static int xaccept(int s)
+{
+	int fd = accept(s, NULL, 0);
+
+	if (fd < 0)
+		die_perror("accept");
+
+	return fd;
+}
+
+static int server(int pipefd)
+{
+	int fd = -1, r;
+
+	switch (pf) {
+	case AF_INET:
+		fd = sock_listen_mptcp("127.0.0.1", "15432");
+		break;
+	case AF_INET6:
+		fd = sock_listen_mptcp("::1", "15432");
+		break;
+	default:
+		xerror("Unknown pf %d\n", pf);
+		break;
+	}
+
+	r = write(pipefd, "conn", 4);
+	assert(r == 4);
+
+	alarm(15);
+	r = xaccept(fd);
+
+	process_one_client(r, pipefd);
+
+	return 0;
+}
+
+static void test_ip_tos_sockopt(int fd)
+{
+	uint8_t tos_in, tos_out;
+	socklen_t s;
+	int r;
+
+	tos_in = rand() & 0xfc;
+	r = setsockopt(fd, SOL_IP, IP_TOS, &tos_in, sizeof(tos_out));
+	if (r != 0)
+		die_perror("setsockopt IP_TOS");
+
+	tos_out = 0;
+	s = sizeof(tos_out);
+	r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+	if (r != 0)
+		die_perror("getsockopt IP_TOS");
+
+	if (tos_in != tos_out)
+		xerror("tos %x != %x socklen_t %d\n", tos_in, tos_out, s);
+
+	if (s != 1)
+		xerror("tos should be 1 byte");
+
+	s = 0;
+	r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+	if (r != 0)
+		die_perror("getsockopt IP_TOS 0");
+	if (s != 0)
+		xerror("expect socklen_t == 0");
+
+	s = -1;
+	r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+	if (r != -1 && errno != EINVAL)
+		die_perror("getsockopt IP_TOS did not indicate -EINVAL");
+	if (s != -1)
+		xerror("expect socklen_t == -1");
+}
+
+static int client(int pipefd)
+{
+	int fd = -1;
+
+	alarm(15);
+
+	switch (pf) {
+	case AF_INET:
+		fd = sock_connect_mptcp("127.0.0.1", "15432", IPPROTO_MPTCP);
+		break;
+	case AF_INET6:
+		fd = sock_connect_mptcp("::1", "15432", IPPROTO_MPTCP);
+		break;
+	default:
+		xerror("Unknown pf %d\n", pf);
+	}
+
+	test_ip_tos_sockopt(fd);
+
+	connect_one_server(fd, pipefd);
+
+	return 0;
+}
+
+static pid_t xfork(void)
+{
+	pid_t p = fork();
+
+	if (p < 0)
+		die_perror("fork");
+
+	return p;
+}
+
+static int rcheck(int wstatus, const char *what)
+{
+	if (WIFEXITED(wstatus)) {
+		if (WEXITSTATUS(wstatus) == 0)
+			return 0;
+		fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus));
+		return WEXITSTATUS(wstatus);
+	} else if (WIFSIGNALED(wstatus)) {
+		xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus));
+	} else if (WIFSTOPPED(wstatus)) {
+		xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus));
+	}
+
+	return 111;
+}
+
+static void init_rng(void)
+{
+	int fd = open("/dev/urandom", O_RDONLY);
+
+	if (fd >= 0) {
+		unsigned int foo;
+		ssize_t ret;
+
+		/* can't fail */
+		ret = read(fd, &foo, sizeof(foo));
+		assert(ret == sizeof(foo));
+
+		close(fd);
+		srand(foo);
+	} else {
+		srand(time(NULL));
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	int e1, e2, wstatus;
+	pid_t s, c, ret;
+	int pipefds[2];
+
+	parse_opts(argc, argv);
+
+	init_rng();
+
+	e1 = pipe(pipefds);
+	if (e1 < 0)
+		die_perror("pipe");
+
+	s = xfork();
+	if (s == 0)
+		return server(pipefds[1]);
+
+	close(pipefds[1]);
+
+	/* wait until server bound a socket */
+	e1 = read(pipefds[0], &e1, 4);
+	assert(e1 == 4);
+
+	c = xfork();
+	if (c == 0)
+		return client(pipefds[0]);
+
+	close(pipefds[0]);
+
+	ret = waitpid(s, &wstatus, 0);
+	if (ret == -1)
+		die_perror("waitpid");
+	e1 = rcheck(wstatus, "server");
+	ret = waitpid(c, &wstatus, 0);
+	if (ret == -1)
+		die_perror("waitpid");
+	e2 = rcheck(wstatus, "client");
+
+	return e1 ? e1 : e2;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
new file mode 100755
index 000000000000..e2d70c18786e
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -0,0 +1,338 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+ret=0
+sin=""
+sout=""
+cin=""
+cout=""
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+iptables="iptables"
+ip6tables="ip6tables"
+
+ns1=""
+ns2=""
+ns_sbox=""
+
+add_mark_rules()
+{
+	local ns=$1
+	local m=$2
+
+	local t
+	for t in ${iptables} ${ip6tables}; do
+		# just to debug: check we have multiple subflows connection requests
+		ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
+
+		# RST packets might be handled by a internal dummy socket
+		ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT
+
+		ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT
+		ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP
+	done
+}
+
+init()
+{
+	mptcp_lib_ns_init ns1 ns2 ns_sbox
+
+	local i
+	for i in $(seq 1 4); do
+		ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
+		ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
+		ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
+		ip -net "$ns1" link set ns1eth$i up
+
+		ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
+		ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
+		ip -net "$ns2" link set ns2eth$i up
+
+		# let $ns2 reach any $ns1 address from any interface
+		ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+
+		ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
+		ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
+
+		ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
+		ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
+	done
+
+	ip netns exec $ns1 ./pm_nl_ctl limits 8 8
+	ip netns exec $ns2 ./pm_nl_ctl limits 8 8
+
+	add_mark_rules $ns1 1
+	add_mark_rules $ns2 2
+}
+
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
+cleanup()
+{
+	mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
+	rm -f "$cin" "$cout"
+	rm -f "$sin" "$sout"
+}
+
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+mptcp_lib_check_tools ip "${iptables}" "${ip6tables}"
+
+check_mark()
+{
+	local ns=$1
+	local af=$2
+
+	local tables=${iptables}
+
+	if [ $af -eq 6 ];then
+		tables=${ip6tables}
+	fi
+
+	local counters values
+	counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP)
+	values=${counters%DROP*}
+
+	local v
+	for v in $values; do
+		if [ $v -ne 0 ]; then
+			mptcp_lib_pr_fail "got $tables $values in ns $ns," \
+					  "not 0 - not all expected packets marked"
+			ret=${KSFT_FAIL}
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+print_title()
+{
+	mptcp_lib_print_title "${@}"
+}
+
+do_transfer()
+{
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local cl_proto="$3"
+	local srv_proto="$4"
+	local connect_addr="$5"
+
+	local port=12001
+
+	:> "$cout"
+	:> "$sout"
+
+	local mptcp_connect="./mptcp_connect -r 20"
+
+	local local_addr ip
+	if mptcp_lib_is_v6 "${connect_addr}"; then
+		local_addr="::"
+		ip=ipv6
+	else
+		local_addr="0.0.0.0"
+		ip=ipv4
+	fi
+
+	cmsg="TIMESTAMPNS"
+	if mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
+		cmsg+=",TCPINQ"
+	fi
+
+	timeout ${timeout_test} \
+		ip netns exec ${listener_ns} \
+			$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
+				${local_addr} < "$sin" > "$sout" &
+	local spid=$!
+
+	sleep 1
+
+	timeout ${timeout_test} \
+		ip netns exec ${connector_ns} \
+			$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
+				$connect_addr < "$cin" > "$cout" &
+
+	local cpid=$!
+
+	wait $cpid
+	local retc=$?
+	wait $spid
+	local rets=$?
+
+	print_title "Transfer ${ip:2}"
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		mptcp_lib_pr_fail "client exit code $retc, server $rets"
+		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
+		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+
+		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
+		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+
+		mptcp_lib_result_fail "transfer ${ip}"
+
+		ret=${KSFT_FAIL}
+		return 1
+	fi
+	if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then
+		rets=1
+	else
+		mptcp_lib_pr_ok
+	fi
+	mptcp_lib_result_code "${rets}" "transfer ${ip}"
+
+	print_title "Mark ${ip:2}"
+	if [ $local_addr = "::" ];then
+		check_mark $listener_ns 6 || retc=1
+		check_mark $connector_ns 6 || retc=1
+	else
+		check_mark $listener_ns 4 || retc=1
+		check_mark $connector_ns 4 || retc=1
+	fi
+
+	mptcp_lib_result_code "${retc}" "mark ${ip}"
+
+	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+		mptcp_lib_pr_ok
+		return 0
+	fi
+	mptcp_lib_pr_fail
+
+	return 1
+}
+
+make_file()
+{
+	local name=$1
+	local who=$2
+	local size=$3
+
+	mptcp_lib_make_file $name 1024 $size
+
+	echo "Created $name (size $size KB) containing data sent by $who"
+}
+
+do_mptcp_sockopt_tests()
+{
+	local lret=0
+
+	if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then
+		mptcp_lib_pr_skip "MPTCP sockopt not supported"
+		mptcp_lib_result_skip "sockopt"
+		return
+	fi
+
+	ip netns exec "$ns_sbox" ./mptcp_sockopt
+	lret=$?
+
+	print_title "SOL_MPTCP sockopt v4"
+	if [ $lret -ne 0 ]; then
+		mptcp_lib_pr_fail
+		mptcp_lib_result_fail "sockopt v4"
+		ret=$lret
+		return
+	fi
+	mptcp_lib_pr_ok
+	mptcp_lib_result_pass "sockopt v4"
+
+	ip netns exec "$ns_sbox" ./mptcp_sockopt -6
+	lret=$?
+
+	print_title "SOL_MPTCP sockopt v6"
+	if [ $lret -ne 0 ]; then
+		mptcp_lib_pr_fail
+		mptcp_lib_result_fail "sockopt v6"
+		ret=$lret
+		return
+	fi
+	mptcp_lib_pr_ok
+	mptcp_lib_result_pass "sockopt v6"
+}
+
+run_tests()
+{
+	local listener_ns="$1"
+	local connector_ns="$2"
+	local connect_addr="$3"
+	local lret=0
+
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+
+	lret=$?
+
+	if [ $lret -ne 0 ]; then
+		ret=$lret
+		return
+	fi
+}
+
+do_tcpinq_test()
+{
+	print_title "TCP_INQ cmsg/ioctl $*"
+	ip netns exec "$ns_sbox" ./mptcp_inq "$@"
+	local lret=$?
+	if [ $lret -ne 0 ];then
+		ret=$lret
+		mptcp_lib_pr_fail
+		mptcp_lib_result_fail "TCP_INQ: $*"
+		return $lret
+	fi
+
+	mptcp_lib_pr_ok
+	mptcp_lib_result_pass "TCP_INQ: $*"
+	return $lret
+}
+
+do_tcpinq_tests()
+{
+	local lret=0
+
+	if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
+		mptcp_lib_pr_skip "TCP_INQ not supported"
+		mptcp_lib_result_skip "TCP_INQ"
+		return
+	fi
+
+	local args
+	for args in "-t tcp" "-r tcp"; do
+		do_tcpinq_test $args
+		lret=$?
+		if [ $lret -ne 0 ] ; then
+			return $lret
+		fi
+		do_tcpinq_test -6 $args
+		lret=$?
+		if [ $lret -ne 0 ] ; then
+			return $lret
+		fi
+	done
+
+	do_tcpinq_test -r tcp -t tcp
+
+	return $?
+}
+
+sin=$(mktemp)
+sout=$(mktemp)
+cin=$(mktemp)
+cout=$(mktemp)
+init
+make_file "$cin" "client" 1
+make_file "$sin" "server" 1
+trap cleanup EXIT
+
+run_tests $ns1 $ns2 10.0.1.1
+run_tests $ns1 $ns2 dead:beef:1::1
+
+do_mptcp_sockopt_tests
+do_tcpinq_tests
+
+mptcp_lib_result_print_all_tap
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 15f4f46ca3a9..6ab8c5d36340 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -1,76 +1,79 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ksft_skip=4
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
 ret=0
 
 usage() {
 	echo "Usage: $0 [ -h ]"
 }
 
-
+optstring=h
 while getopts "$optstring" option;do
 	case "$option" in
 	"h")
 		usage $0
-		exit 0
+		exit ${KSFT_PASS}
 		;;
 	"?")
 		usage $0
-		exit 1
+		exit ${KSFT_FAIL}
 		;;
 	esac
 done
 
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
+ns1=""
 err=$(mktemp)
-ret=0
 
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
 cleanup()
 {
 	rm -f $err
-	ip netns del $ns1
+	mptcp_lib_ns_exit "${ns1}"
 }
 
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without ip tool"
-	exit $ksft_skip
-fi
+mptcp_lib_check_mptcp
+mptcp_lib_check_tools ip
 
 trap cleanup EXIT
 
-ip netns add $ns1 || exit $ksft_skip
-ip -net $ns1 link set lo up
-ip netns exec $ns1 sysctl -q net.mptcp.enabled=1
+mptcp_lib_ns_init ns1
 
 check()
 {
 	local cmd="$1"
 	local expected="$2"
 	local msg="$3"
-	local out=`$cmd 2>$err`
-	local cmd_ret=$?
-
-	printf "%-50s %s" "$msg"
-	if [ $cmd_ret -ne 0 ]; then
-		echo "[FAIL] command execution '$cmd' stderr "
-		cat $err
-		ret=1
-	elif [ "$out" = "$expected" ]; then
-		echo "[ OK ]"
-	else
-		echo -n "[FAIL] "
-		echo "expected '$expected' got '$out'"
-		ret=1
+	local rc=0
+
+	mptcp_lib_print_title "$msg"
+	mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+	if [ ${rc} -eq 2 ]; then
+		mptcp_lib_result_fail "${msg} # error ${rc}"
+		ret=${KSFT_FAIL}
+	elif [ ${rc} -eq 0 ]; then
+		mptcp_lib_print_ok "[ OK ]"
+		mptcp_lib_result_pass "${msg}"
+	elif [ ${rc} -eq 1 ]; then
+		mptcp_lib_result_fail "${msg} # different output"
+		ret=${KSFT_FAIL}
 	fi
 }
 
 check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "defaults limits"
+
+default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)"
+if mptcp_lib_expect_all_features; then
+	check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 2" "defaults limits"
+fi
 
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
@@ -88,21 +91,22 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" \
 "id 1 flags  10.0.1.1
 id 3 flags signal,backup 10.0.1.3" "dump addrs after del"
 
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null
 check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
 
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal
 check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
 
-for i in `seq 5 9`; do
+for i in $(seq 5 9); do
 	ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
 done
 check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
 check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
 
-for i in `seq 9 256`; do
+ip netns exec $ns1 ./pm_nl_ctl del 9
+for i in $(seq 10 255); do
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
 	ip netns exec $ns1 ./pm_nl_ctl del $i
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9
 done
 check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags  10.0.1.1
 id 3 flags signal,backup 10.0.1.3
@@ -115,16 +119,81 @@ id 8 flags signal 10.0.1.8" "id limit"
 ip netns exec $ns1 ./pm_nl_ctl flush
 check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
 
-ip netns exec $ns1 ./pm_nl_ctl limits 9 1
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "rcv addrs above hard limit"
+ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit"
 
-ip netns exec $ns1 ./pm_nl_ctl limits 1 9
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "subflows above hard limit"
+ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit"
 
 ip netns exec $ns1 ./pm_nl_ctl limits 8 8
 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
 subflows 8" "set limits"
 
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags  10.0.1.1
+id 2 flags  10.0.1.2
+id 3 flags  10.0.1.7
+id 4 flags  10.0.1.8
+id 100 flags  10.0.1.3
+id 101 flags  10.0.1.4
+id 254 flags  10.0.1.5
+id 255 flags  10.0.1.6" "set ids"
+
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags  10.0.0.1
+id 2 flags  10.0.0.4
+id 3 flags  10.0.0.6
+id 4 flags  10.0.0.7
+id 5 flags  10.0.0.8
+id 253 flags  10.0.0.5
+id 254 flags  10.0.0.2
+id 255 flags  10.0.0.3" "wrap-around ids"
+
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup 10.0.1.1" "set flags (backup)"
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" "          (nobackup)"
+
+# fullmesh support has been added later
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null
+if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" ||
+   mptcp_lib_expect_all_features; then
+	check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,fullmesh 10.0.1.1" "          (fullmesh)"
+	ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
+	check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" "          (nofullmesh)"
+	ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
+	check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup,fullmesh 10.0.1.1" "          (backup,fullmesh)"
+else
+	for st in fullmesh nofullmesh backup,fullmesh; do
+		st="          (${st})"
+		mptcp_lib_print_title "${st}"
+		mptcp_lib_pr_skip
+		mptcp_lib_result_skip "${st}"
+	done
+fi
+
+mptcp_lib_result_print_all_tap
 exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index b24a2f17d415..7426a2cbd4a0 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <limits.h>
 
 #include <sys/socket.h>
 #include <sys/types.h>
@@ -21,16 +22,29 @@
 #ifndef MPTCP_PM_NAME
 #define MPTCP_PM_NAME		"mptcp_pm"
 #endif
+#ifndef MPTCP_PM_EVENTS
+#define MPTCP_PM_EVENTS		"mptcp_pm_events"
+#endif
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
 
 static void syntax(char *argv[])
 {
-	fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]);
-	fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
-	fprintf(stderr, "\tdel <id>\n");
+	fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]);
+	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
+	fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
+	fprintf(stderr, "\trem id <local-id> token <token>\n");
+	fprintf(stderr, "\tcsf lip <local-ip> lid <local-id> rip <remote-ip> rport <remote-port> token <token>\n");
+	fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n");
+	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
+	fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n");
 	fprintf(stderr, "\tflush\n");
 	fprintf(stderr, "\tdump\n");
 	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
+	fprintf(stderr, "\tevents\n");
+	fprintf(stderr, "\tlisten <local-ip> <local-port>\n");
 	exit(0);
 }
 
@@ -52,20 +66,25 @@ static int init_genl_req(char *data, int family, int cmd, int version)
 	return off;
 }
 
-static void nl_error(struct nlmsghdr *nh)
+static int nl_error(struct nlmsghdr *nh)
 {
 	struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh);
 	int len = nh->nlmsg_len - sizeof(*nh);
 	uint32_t off;
 
-	if (len < sizeof(struct nlmsgerr))
+	if (len < sizeof(struct nlmsgerr)) {
 		error(1, 0, "netlink error message truncated %d min %ld", len,
 		      sizeof(struct nlmsgerr));
+		return -1;
+	}
 
-	if (!err->error) {
+	if (err->error) {
 		/* check messages from kernel */
 		struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh);
 
+		fprintf(stderr, "netlink error %d (%s)\n",
+			err->error, strerror(-err->error));
+
 		while (RTA_OK(attrs, len)) {
 			if (attrs->rta_type == NLMSGERR_ATTR_MSG)
 				fprintf(stderr, "netlink ext ack msg: %s\n",
@@ -77,12 +96,115 @@ static void nl_error(struct nlmsghdr *nh)
 			}
 			attrs = RTA_NEXT(attrs, len);
 		}
-	} else {
-		fprintf(stderr, "netlink error %d", err->error);
+		return -1;
 	}
+
+	return 0;
 }
 
-/* do a netlink command and, if max > 0, fetch the reply  */
+static int capture_events(int fd, int event_group)
+{
+	u_int8_t buffer[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+			NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024];
+	struct genlmsghdr *ghdr;
+	struct rtattr *attrs;
+	struct nlmsghdr *nh;
+	int ret = 0;
+	int res_len;
+	int msg_len;
+	fd_set rfds;
+
+	if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+		       &event_group, sizeof(event_group)) < 0)
+		error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+
+	do {
+		FD_ZERO(&rfds);
+		FD_SET(fd, &rfds);
+		res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024;
+
+		ret = select(FD_SETSIZE, &rfds, NULL, NULL, NULL);
+
+		if (ret < 0)
+			error(1, ret, "error in select() on NL socket");
+
+		res_len = recv(fd, buffer, res_len, 0);
+		if (res_len < 0)
+			error(1, res_len, "error on recv() from NL socket");
+
+		nh = (struct nlmsghdr *)buffer;
+
+		for (; NLMSG_OK(nh, res_len); nh = NLMSG_NEXT(nh, res_len)) {
+			if (nh->nlmsg_type == NLMSG_ERROR)
+				error(1, NLMSG_ERROR, "received invalid NL message");
+
+			ghdr = (struct genlmsghdr *)NLMSG_DATA(nh);
+
+			if (ghdr->cmd == 0)
+				continue;
+
+			fprintf(stderr, "type:%d", ghdr->cmd);
+
+			msg_len = nh->nlmsg_len - NLMSG_LENGTH(GENL_HDRLEN);
+
+			attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+			while (RTA_OK(attrs, msg_len)) {
+				if (attrs->rta_type == MPTCP_ATTR_TOKEN)
+					fprintf(stderr, ",token:%u", *(__u32 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_FAMILY)
+					fprintf(stderr, ",family:%u", *(__u16 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_LOC_ID)
+					fprintf(stderr, ",loc_id:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_REM_ID)
+					fprintf(stderr, ",rem_id:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_SADDR4) {
+					u_int32_t saddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+					fprintf(stderr, ",saddr4:%u.%u.%u.%u", saddr4 >> 24,
+					       (saddr4 >> 16) & 0xFF, (saddr4 >> 8) & 0xFF,
+					       (saddr4 & 0xFF));
+				} else if (attrs->rta_type == MPTCP_ATTR_SADDR6) {
+					char buf[INET6_ADDRSTRLEN];
+
+					if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+						      sizeof(buf)) != NULL)
+						fprintf(stderr, ",saddr6:%s", buf);
+				} else if (attrs->rta_type == MPTCP_ATTR_DADDR4) {
+					u_int32_t daddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+					fprintf(stderr, ",daddr4:%u.%u.%u.%u", daddr4 >> 24,
+					       (daddr4 >> 16) & 0xFF, (daddr4 >> 8) & 0xFF,
+					       (daddr4 & 0xFF));
+				} else if (attrs->rta_type == MPTCP_ATTR_DADDR6) {
+					char buf[INET6_ADDRSTRLEN];
+
+					if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+						      sizeof(buf)) != NULL)
+						fprintf(stderr, ",daddr6:%s", buf);
+				} else if (attrs->rta_type == MPTCP_ATTR_SPORT)
+					fprintf(stderr, ",sport:%u",
+						ntohs(*(__u16 *)RTA_DATA(attrs)));
+				else if (attrs->rta_type == MPTCP_ATTR_DPORT)
+					fprintf(stderr, ",dport:%u",
+						ntohs(*(__u16 *)RTA_DATA(attrs)));
+				else if (attrs->rta_type == MPTCP_ATTR_BACKUP)
+					fprintf(stderr, ",backup:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_ERROR)
+					fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
+					fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+
+				attrs = RTA_NEXT(attrs, msg_len);
+			}
+		}
+		fprintf(stderr, "\n");
+	} while (1);
+
+	return 0;
+}
+
+/* do a netlink command and, if max > 0, fetch the reply ; nh's size >1024B */
 static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
 {
 	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
@@ -91,12 +213,16 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
 	int rem, ret;
 	int err = 0;
 
+	/* If no expected answer, ask for an ACK to look for errors if any */
+	if (max == 0) {
+		nh->nlmsg_flags |= NLM_F_ACK;
+		max = 1024;
+	}
+
 	nh->nlmsg_len = len;
 	ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr));
 	if (ret != len)
 		error(1, errno, "send netlink: %uB != %uB\n", ret, len);
-	if (max == 0)
-		return 0;
 
 	addr_len = sizeof(nladdr);
 	rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len);
@@ -105,21 +231,29 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
 
 	/* Beware: the NLMSG_NEXT macro updates the 'rem' argument */
 	for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) {
-		if (nh->nlmsg_type == NLMSG_ERROR) {
-			nl_error(nh);
+		if (nh->nlmsg_type == NLMSG_DONE)
+			break;
+
+		if (nh->nlmsg_type == NLMSG_ERROR && nl_error(nh))
 			err = 1;
-		}
 	}
 	if (err)
 		error(1, 0, "bailing out due to netlink error[s]");
 	return ret;
 }
 
-static int genl_parse_getfamily(struct nlmsghdr *nlh)
+static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
+				int *events_mcast_grp)
 {
 	struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
 	int len = nlh->nlmsg_len;
 	struct rtattr *attrs;
+	struct rtattr *grps;
+	struct rtattr *grp;
+	int got_events_grp;
+	int got_family;
+	int grps_len;
+	int grp_len;
 
 	if (nlh->nlmsg_type != GENL_ID_CTRL)
 		error(1, errno, "Not a controller message, len=%d type=0x%x\n",
@@ -134,9 +268,42 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
 		error(1, errno, "Unknown controller command %d\n", ghdr->cmd);
 
 	attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+	got_family = 0;
+	got_events_grp = 0;
+
 	while (RTA_OK(attrs, len)) {
-		if (attrs->rta_type == CTRL_ATTR_FAMILY_ID)
-			return *(__u16 *)RTA_DATA(attrs);
+		if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) {
+			*pm_family = *(__u16 *)RTA_DATA(attrs);
+			got_family = 1;
+		} else if (attrs->rta_type == CTRL_ATTR_MCAST_GROUPS) {
+			grps = RTA_DATA(attrs);
+			grps_len = RTA_PAYLOAD(attrs);
+
+			while (RTA_OK(grps, grps_len)) {
+				grp = RTA_DATA(grps);
+				grp_len = RTA_PAYLOAD(grps);
+				got_events_grp = 0;
+
+				while (RTA_OK(grp, grp_len)) {
+					if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
+						*events_mcast_grp = *(__u32 *)RTA_DATA(grp);
+					else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
+						 !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+						got_events_grp = 1;
+
+					grp = RTA_NEXT(grp, grp_len);
+				}
+
+				if (got_events_grp)
+					break;
+
+				grps = RTA_NEXT(grps, grps_len);
+			}
+		}
+
+		if (got_family && got_events_grp)
+			return 0;
+
 		attrs = RTA_NEXT(attrs, len);
 	}
 
@@ -144,7 +311,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
 	return -1;
 }
 
-static int resolve_mptcp_pm_netlink(int fd)
+static int resolve_mptcp_pm_netlink(int fd, int *pm_family, int *events_mcast_grp)
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
 		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
@@ -166,7 +333,429 @@ static int resolve_mptcp_pm_netlink(int fd)
 	off += NLMSG_ALIGN(rta->rta_len);
 
 	do_nl_req(fd, nh, off, sizeof(data));
-	return genl_parse_getfamily((void *)data);
+	return genl_parse_getfamily((void *)data, pm_family, events_mcast_grp);
+}
+
+int dsf(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct rtattr *rta, *addr;
+	u_int16_t family, port;
+	struct nlmsghdr *nh;
+	u_int32_t token;
+	int addr_start;
+	int off = 0;
+	int arg;
+
+	const char *params[5];
+
+	memset(params, 0, 5 * sizeof(const char *));
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_DESTROY,
+			    MPTCP_PM_VER);
+
+	if (argc < 12)
+		syntax(argv);
+
+	/* Params recorded in this order:
+	 * <local-ip>, <local-port>, <remote-ip>, <remote-port>, <token>
+	 */
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "lip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local IP");
+
+			params[0] = argv[arg];
+		} else if (!strcmp(argv[arg], "lport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local port");
+
+			params[1] = argv[arg];
+		} else if (!strcmp(argv[arg], "rip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote IP");
+
+			params[2] = argv[arg];
+		} else if (!strcmp(argv[arg], "rport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote port");
+
+			params[3] = argv[arg];
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token");
+
+			params[4] = argv[arg];
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	for (arg = 0; arg < 4; arg = arg + 2) {
+		/*  addr header */
+		addr_start = off;
+		addr = (void *)(data + off);
+		addr->rta_type = NLA_F_NESTED |
+			((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+		addr->rta_len = RTA_LENGTH(0);
+		off += NLMSG_ALIGN(addr->rta_len);
+
+		/*  addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else
+			error(1, errno, "can't parse ip %s", params[arg]);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/*  port */
+		port = atoi(params[arg + 1]);
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &port, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		addr->rta_len = off - addr_start;
+	}
+
+	/* token */
+	token = strtoul(params[4], NULL, 10);
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
+}
+
+int csf(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW;
+	const char *params[5];
+	struct nlmsghdr *nh;
+	struct rtattr *addr;
+	struct rtattr *rta;
+	u_int16_t family;
+	u_int32_t token;
+	u_int16_t port;
+	int addr_start;
+	u_int8_t id;
+	int off = 0;
+	int arg;
+
+	memset(params, 0, 5 * sizeof(const char *));
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_CREATE,
+			    MPTCP_PM_VER);
+
+	if (argc < 12)
+		syntax(argv);
+
+	/* Params recorded in this order:
+	 * <local-ip>, <local-id>, <remote-ip>, <remote-port>, <token>
+	 */
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "lip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local IP");
+
+			params[0] = argv[arg];
+		} else if (!strcmp(argv[arg], "lid")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local id");
+
+			params[1] = argv[arg];
+		} else if (!strcmp(argv[arg], "rip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote ip");
+
+			params[2] = argv[arg];
+		} else if (!strcmp(argv[arg], "rport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote port");
+
+			params[3] = argv[arg];
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token");
+
+			params[4] = argv[arg];
+		} else
+			error(1, 0, "unknown param %s", argv[arg]);
+	}
+
+	for (arg = 0; arg < 4; arg = arg + 2) {
+		/*  addr header */
+		addr_start = off;
+		addr = (void *)(data + off);
+		addr->rta_type = NLA_F_NESTED |
+			((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+		addr->rta_len = RTA_LENGTH(0);
+		off += NLMSG_ALIGN(addr->rta_len);
+
+		/*  addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else
+			error(1, errno, "can't parse ip %s", params[arg]);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		if (arg == 2) {
+			/*  port */
+			port = atoi(params[arg + 1]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
+		}
+
+		if (arg == 0) {
+			/* id */
+			id = atoi(params[arg + 1]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		}
+
+		/* addr flags */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+		rta->rta_len = RTA_LENGTH(4);
+		memcpy(RTA_DATA(rta), &flags, 4);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		addr->rta_len = off - addr_start;
+	}
+
+	/* token */
+	token = strtoul(params[4], NULL, 10);
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
+}
+
+int remove_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct nlmsghdr *nh;
+	struct rtattr *rta;
+	u_int32_t token;
+	u_int8_t id;
+	int off = 0;
+	int arg;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_REMOVE,
+			    MPTCP_PM_VER);
+
+	if (argc < 6)
+		syntax(argv);
+
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "id")) {
+			if (++arg >= argc)
+				error(1, 0, " missing id value");
+
+			id = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ATTR_LOC_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token value");
+
+			token = strtoul(argv[arg], NULL, 10);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &token, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	do_nl_req(fd, nh, off, 0);
+	return 0;
+}
+
+int announce_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	u_int32_t flags = MPTCP_PM_ADDR_FLAG_SIGNAL;
+	u_int32_t token = UINT_MAX;
+	struct rtattr *rta, *addr;
+	u_int32_t id = UINT_MAX;
+	struct nlmsghdr *nh;
+	u_int16_t family;
+	int addr_start;
+	int off = 0;
+	int arg;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ANNOUNCE,
+			    MPTCP_PM_VER);
+
+	if (argc < 7)
+		syntax(argv);
+
+	/* local-ip header */
+	addr_start = off;
+	addr = (void *)(data + off);
+	addr->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+	addr->rta_len = RTA_LENGTH(0);
+	off += NLMSG_ALIGN(addr->rta_len);
+
+	/* local-ip data */
+	/* record addr type */
+	rta = (void *)(data + off);
+	if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
+		family = AF_INET;
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+		rta->rta_len = RTA_LENGTH(4);
+	} else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
+		family = AF_INET6;
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+		rta->rta_len = RTA_LENGTH(16);
+	} else
+		error(1, errno, "can't parse ip %s", argv[2]);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* addr family */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+	rta->rta_len = RTA_LENGTH(2);
+	memcpy(RTA_DATA(rta), &family, 2);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	for (arg = 3; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "id")) {
+			/* local-id */
+			if (++arg >= argc)
+				error(1, 0, " missing id value");
+
+			id = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "dev")) {
+			/* for the if_index */
+			int32_t ifindex;
+
+			if (++arg >= argc)
+				error(1, 0, " missing dev name");
+
+			ifindex = if_nametoindex(argv[arg]);
+			if (!ifindex)
+				error(1, errno, "unknown device %s", argv[arg]);
+
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &ifindex, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "port")) {
+			/* local-port (optional) */
+			u_int16_t port;
+
+			if (++arg >= argc)
+				error(1, 0, " missing port value");
+
+			port = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "token")) {
+			/* MPTCP connection token */
+			if (++arg >= argc)
+				error(1, 0, " missing token value");
+
+			token = strtoul(argv[arg], NULL, 10);
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	/* addr flags */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &flags, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	addr->rta_len = off - addr_start;
+
+	if (id == UINT_MAX || token == UINT_MAX)
+		error(1, 0, " missing mandatory inputs");
+
+	/* token */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
 }
 
 int add_addr(int fd, int pm_family, int argc, char *argv[])
@@ -176,8 +765,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
 		  1024];
 	struct rtattr *rta, *nest;
 	struct nlmsghdr *nh;
+	u_int32_t flags = 0;
 	u_int16_t family;
-	u_int32_t flags;
 	int nest_start;
 	u_int8_t id;
 	int off = 0;
@@ -223,7 +812,6 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
 			char *tok, *str;
 
 			/* flags */
-			flags = 0;
 			if (++arg >= argc)
 				error(1, 0, " missing flags value");
 
@@ -236,11 +824,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
 					flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
 				else if (!strcmp(tok, "backup"))
 					flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+				else if (!strcmp(tok, "fullmesh"))
+					flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
 				else
 					error(1, errno,
 					      "unknown flag %s", argv[arg]);
 			}
 
+			if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+			    flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+				error(1, errno, "error flag fullmesh");
+			}
+
 			rta = (void *)(data + off);
 			rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
 			rta->rta_len = RTA_LENGTH(4);
@@ -271,6 +866,20 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
 			rta->rta_len = RTA_LENGTH(4);
 			memcpy(RTA_DATA(rta), &ifindex, 4);
 			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "port")) {
+			u_int16_t port;
+
+			if (++arg >= argc)
+				error(1, 0, " missing port value");
+			if (!(flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
+				error(1, 0, " flags must be signal when using port");
+
+			port = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
 		} else
 			error(1, 0, "unknown keyword %s", argv[arg]);
 	}
@@ -287,6 +896,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
 		  1024];
 	struct rtattr *rta, *nest;
 	struct nlmsghdr *nh;
+	u_int16_t family;
 	int nest_start;
 	u_int8_t id;
 	int off = 0;
@@ -296,11 +906,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
 	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
 			    MPTCP_PM_VER);
 
-	/* the only argument is the address id */
-	if (argc != 3)
+	/* the only argument is the address id (nonzero) */
+	if (argc != 3 && argc != 4)
 		syntax(argv);
 
 	id = atoi(argv[2]);
+	/* zero id with the IP address */
+	if (!id && argc != 4)
+		syntax(argv);
 
 	nest_start = off;
 	nest = (void *)(data + off);
@@ -314,6 +927,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
 	rta->rta_len = RTA_LENGTH(1);
 	memcpy(RTA_DATA(rta), &id, 1);
 	off += NLMSG_ALIGN(rta->rta_len);
+
+	if (!id) {
+		/* addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else {
+			error(1, errno, "can't parse ip %s", argv[3]);
+		}
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+	}
 	nest->rta_len = off - nest_start;
 
 	do_nl_req(fd, nh, off, 0);
@@ -323,6 +960,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
 static void print_addr(struct rtattr *attrs, int len)
 {
 	uint16_t family = 0;
+	uint16_t port = 0;
 	char str[1024];
 	uint32_t flags;
 	uint8_t id;
@@ -330,12 +968,16 @@ static void print_addr(struct rtattr *attrs, int len)
 	while (RTA_OK(attrs, len)) {
 		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY)
 			memcpy(&family, RTA_DATA(attrs), 2);
+		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_PORT)
+			memcpy(&port, RTA_DATA(attrs), 2);
 		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) {
 			if (family != AF_INET)
 				error(1, errno, "wrong IP (v4) for family %d",
 				      family);
 			inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str));
 			printf("%s", str);
+			if (port)
+				printf(" %d", port);
 		}
 		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) {
 			if (family != AF_INET6)
@@ -343,6 +985,8 @@ static void print_addr(struct rtattr *attrs, int len)
 				      family);
 			inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str));
 			printf("%s", str);
+			if (port)
+				printf(" %d", port);
 		}
 		if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) {
 			memcpy(&id, RTA_DATA(attrs), 1);
@@ -373,6 +1017,20 @@ static void print_addr(struct rtattr *attrs, int len)
 					printf(",");
 			}
 
+			if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+				printf("fullmesh");
+				flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
+				if (flags)
+					printf(",");
+			}
+
+			if (flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
+				printf("implicit");
+				flags &= ~MPTCP_PM_ADDR_FLAG_IMPLICIT;
+				if (flags)
+					printf(",");
+			}
+
 			/* bump unknown flags, if any */
 			if (flags)
 				printf("0x%x", flags);
@@ -429,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
 		  1024];
 	struct rtattr *rta, *nest;
 	struct nlmsghdr *nh;
+	u_int32_t token = 0;
 	int nest_start;
 	u_int8_t id;
 	int off = 0;
@@ -439,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
 			    MPTCP_PM_VER);
 
 	/* the only argument is the address id */
-	if (argc != 3)
+	if (argc != 3 && argc != 5)
 		syntax(argv);
 
 	id = atoi(argv[2]);
+	if (argc == 5 && !strcmp(argv[3], "token"))
+		token = strtoul(argv[4], NULL, 10);
 
 	nest_start = off;
 	nest = (void *)(data + off);
@@ -458,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
 	off += NLMSG_ALIGN(rta->rta_len);
 	nest->rta_len = off - nest_start;
 
+	/* token */
+	if (token) {
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+		rta->rta_len = RTA_LENGTH(4);
+		memcpy(RTA_DATA(rta), &token, 4);
+		off += NLMSG_ALIGN(rta->rta_len);
+	}
+
 	print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
 	return 0;
 }
@@ -469,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[])
 		  1024];
 	pid_t pid = getpid();
 	struct nlmsghdr *nh;
+	u_int32_t token = 0;
+	struct rtattr *rta;
 	int off = 0;
 
+	if (argc != 2 && argc != 4)
+		syntax(argv);
+
+	if (argc == 4 && !strcmp(argv[2], "token"))
+		token = strtoul(argv[3], NULL, 10);
+
 	memset(data, 0, sizeof(data));
 	nh = (void *)data;
 	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
@@ -480,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[])
 	nh->nlmsg_pid = pid;
 	nh->nlmsg_len = off;
 
+	/* token */
+	if (token) {
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+		rta->rta_len = RTA_LENGTH(4);
+		memcpy(RTA_DATA(rta), &token, 4);
+		off += NLMSG_ALIGN(rta->rta_len);
+	}
+
 	print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
 	return 0;
 }
@@ -584,9 +1271,245 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[])
 	return 0;
 }
 
+int add_listener(int argc, char *argv[])
+{
+	struct sockaddr_storage addr;
+	struct sockaddr_in6 *a6;
+	struct sockaddr_in *a4;
+	u_int16_t family;
+	int enable = 1;
+	int sock;
+	int err;
+
+	if (argc < 4)
+		syntax(argv);
+
+	memset(&addr, 0, sizeof(struct sockaddr_storage));
+	a4 = (struct sockaddr_in *)&addr;
+	a6 = (struct sockaddr_in6 *)&addr;
+
+	if (inet_pton(AF_INET, argv[2], &a4->sin_addr)) {
+		family = AF_INET;
+		a4->sin_family = family;
+		a4->sin_port = htons(atoi(argv[3]));
+	} else if (inet_pton(AF_INET6, argv[2], &a6->sin6_addr)) {
+		family = AF_INET6;
+		a6->sin6_family = family;
+		a6->sin6_port = htons(atoi(argv[3]));
+	} else
+		error(1, errno, "can't parse ip %s", argv[2]);
+
+	sock = socket(family, SOCK_STREAM, IPPROTO_MPTCP);
+	if (sock < 0)
+		error(1, errno, "can't create listener sock\n");
+
+	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))) {
+		close(sock);
+		error(1, errno, "can't set SO_REUSEADDR on listener sock\n");
+	}
+
+	err = bind(sock, (struct sockaddr *)&addr,
+		   ((family == AF_INET) ? sizeof(struct sockaddr_in) :
+		    sizeof(struct sockaddr_in6)));
+
+	if (err == 0 && listen(sock, 30) == 0)
+		pause();
+
+	close(sock);
+	return 0;
+}
+
+int set_flags(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct rtattr *rta, *nest;
+	struct nlmsghdr *nh;
+	u_int32_t flags = 0;
+	u_int32_t token = 0;
+	u_int16_t rport = 0;
+	u_int16_t family;
+	void *rip = NULL;
+	int nest_start;
+	int use_id = 0;
+	u_int8_t id;
+	int off = 0;
+	int arg = 2;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SET_FLAGS,
+			    MPTCP_PM_VER);
+
+	if (argc < 3)
+		syntax(argv);
+
+	nest_start = off;
+	nest = (void *)(data + off);
+	nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+	nest->rta_len = RTA_LENGTH(0);
+	off += NLMSG_ALIGN(nest->rta_len);
+
+	if (!strcmp(argv[arg], "id")) {
+		if (++arg >= argc)
+			error(1, 0, " missing id value");
+
+		use_id = 1;
+		id = atoi(argv[arg]);
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+		rta->rta_len = RTA_LENGTH(1);
+		memcpy(RTA_DATA(rta), &id, 1);
+		off += NLMSG_ALIGN(rta->rta_len);
+	} else {
+		/* addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, argv[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, argv[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else {
+			error(1, errno, "can't parse ip %s", argv[arg]);
+		}
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+	}
+
+	if (++arg >= argc)
+		error(1, 0, " missing flags keyword");
+
+	for (; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token value");
+
+			/* token */
+			token = strtoul(argv[arg], NULL, 10);
+		} else if (!strcmp(argv[arg], "flags")) {
+			char *tok, *str;
+
+			/* flags */
+			if (++arg >= argc)
+				error(1, 0, " missing flags value");
+
+			for (str = argv[arg]; (tok = strtok(str, ","));
+			     str = NULL) {
+				if (!strcmp(tok, "backup"))
+					flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+				else if (!strcmp(tok, "fullmesh"))
+					flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
+				else if (strcmp(tok, "nobackup") &&
+					 strcmp(tok, "nofullmesh"))
+					error(1, errno,
+					      "unknown flag %s", argv[arg]);
+			}
+
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &flags, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "port")) {
+			u_int16_t port;
+
+			if (use_id)
+				error(1, 0, " port can't be used with id");
+
+			if (++arg >= argc)
+				error(1, 0, " missing port value");
+
+			port = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "rport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote port");
+
+			rport = atoi(argv[arg]);
+		} else if (!strcmp(argv[arg], "rip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote ip");
+
+			rip = argv[arg];
+		} else {
+			error(1, 0, "unknown keyword %s", argv[arg]);
+		}
+	}
+	nest->rta_len = off - nest_start;
+
+	/* token */
+	if (token) {
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+		rta->rta_len = RTA_LENGTH(4);
+		memcpy(RTA_DATA(rta), &token, 4);
+		off += NLMSG_ALIGN(rta->rta_len);
+	}
+
+	/* remote addr/port */
+	if (rip) {
+		nest_start = off;
+		nest = (void *)(data + off);
+		nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE;
+		nest->rta_len = RTA_LENGTH(0);
+		off += NLMSG_ALIGN(nest->rta_len);
+
+		/* addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, rip, RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else {
+			error(1, errno, "can't parse ip %s", (char *)rip);
+		}
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		if (rport) {
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &rport, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
+		}
+
+		nest->rta_len = off - nest_start;
+	}
+
+	do_nl_req(fd, nh, off, 0);
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
-	int fd, pm_family;
+	int events_mcast_grp;
+	int pm_family;
+	int fd;
 
 	if (argc < 2)
 		syntax(argv);
@@ -595,10 +1518,18 @@ int main(int argc, char *argv[])
 	if (fd == -1)
 		error(1, errno, "socket netlink");
 
-	pm_family = resolve_mptcp_pm_netlink(fd);
+	resolve_mptcp_pm_netlink(fd, &pm_family, &events_mcast_grp);
 
 	if (!strcmp(argv[1], "add"))
 		return add_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "ann"))
+		return announce_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "rem"))
+		return remove_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "csf"))
+		return csf(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "dsf"))
+		return dsf(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "del"))
 		return del_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "flush"))
@@ -609,6 +1540,12 @@ int main(int argc, char *argv[])
 		return dump_addrs(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "limits"))
 		return get_set_limits(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "set"))
+		return set_flags(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "events"))
+		return capture_events(fd, events_mcast_grp);
+	else if (!strcmp(argv[1], "listen"))
+		return add_listener(argc, argv);
 
 	fprintf(stderr, "unknown sub-command: %s", argv[1]);
 	syntax(argv);
diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings
index 026384c189c9..abc5648b59ab 100644
--- a/tools/testing/selftests/net/mptcp/settings
+++ b/tools/testing/selftests/net/mptcp/settings
@@ -1 +1 @@
-timeout=450
+timeout=1800
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 2f649b431456..1b2366220388 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -1,16 +1,30 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+ns1=""
+ns2=""
+ns3=""
 capture=false
-ksft_skip=4
-timeout=30
-test_cnt=1
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+# a bit more space: because we have more to display
+MPTCP_LIB_TEST_FORMAT="%02u %-60s"
 ret=0
 bail=0
+slack=50
+large=""
+small=""
+sout=""
+cout=""
+capout=""
+size=0
 
 usage() {
 	echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
@@ -19,23 +33,19 @@ usage() {
 	echo -e "\t-d: debug this script"
 }
 
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
 cleanup()
 {
-	rm -f "$cin" "$cout"
-	rm -f "$sin" "$sout"
+	rm -f "$cout" "$sout"
+	rm -f "$large" "$small"
 	rm -f "$capout"
 
-	local netns
-	for netns in "$ns1" "$ns2" "$ns3";do
-		ip netns del $netns
-	done
+	mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}"
 }
 
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without ip tool"
-	exit $ksft_skip
-fi
+mptcp_lib_check_mptcp
+mptcp_lib_check_tools ip
 
 #  "$ns1"              ns2                    ns3
 #     ns1eth1    ns2eth1   ns2eth3      ns3eth1
@@ -50,16 +60,14 @@ setup()
 	sout=$(mktemp)
 	cout=$(mktemp)
 	capout=$(mktemp)
-	size=$((2048 * 4096))
+	size=$((2 * 2048 * 4096))
+
 	dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
 	dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
 
 	trap cleanup EXIT
 
-	for i in "$ns1" "$ns2" "$ns3";do
-		ip netns add $i || exit $ksft_skip
-		ip -net $i link set lo up
-	done
+	mptcp_lib_ns_init ns1 ns2 ns3
 
 	ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
 	ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
@@ -79,7 +87,6 @@ setup()
 
 	ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
 	ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
-	ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
 
 	ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
 	ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
@@ -102,23 +109,16 @@ setup()
 	ip -net "$ns3" route add default via dead:beef:3::2
 
 	ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
-}
-
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex i
 
-	port_hex="$(printf "%04X" "${port}")"
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
+	# debug build can slow down measurably the test program
+	# we use quite tight time limit on the run-time, to ensure
+	# maximum B/W usage.
+	# Use kmemleak/lockdep/kasan/prove_locking presence as a rough
+	# estimate for this being a debug kernel and increase the
+	# maximum run-time accordingly. Observed run times for CI builds
+	# running selftests, including kbuild, were used to determine the
+	# amount of time to add.
+	grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
 }
 
 do_transfer()
@@ -127,18 +127,15 @@ do_transfer()
 	local sin=$2
 	local max_time=$3
 	local port
-	port=$((10000+$test_cnt))
-	test_cnt=$((test_cnt+1))
+	port=$((10000+MPTCP_LIB_TEST_COUNTER))
 
 	:> "$cout"
 	:> "$sout"
 	:> "$capout"
 
-	local addr_port
-	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
-
 	if $capture; then
 		local capuser
+		local rndh="${ns1:4}"
 		if [ -z $SUDO_USER ] ; then
 			capuser=""
 		else
@@ -157,14 +154,18 @@ do_transfer()
 		sleep 1
 	fi
 
-	ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+	timeout ${timeout_test} \
+		ip netns exec ${ns3} \
+			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
+				0.0.0.0 < "$sin" > "$sout" &
 	local spid=$!
 
-	wait_local_port_listen "${ns3}" "${port}"
+	mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
 
-	local start
-	start=$(date +%s%3N)
-	ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+	timeout ${timeout_test} \
+		ip netns exec ${ns1} \
+			./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
+				10.0.3.3 < "$cin" > "$cout" &
 	local cpid=$!
 
 	wait $cpid
@@ -172,33 +173,26 @@ do_transfer()
 	wait $spid
 	local rets=$?
 
-	local stop
-	stop=$(date +%s%3N)
-
 	if $capture; then
 		sleep 1
 		kill ${cappid_listener}
 		kill ${cappid_connector}
 	fi
 
-	local duration
-	duration=$((stop-start))
-
 	cmp $sin $cout > /dev/null 2>&1
 	local cmps=$?
 	cmp $cin $sout > /dev/null 2>&1
 	local cmpc=$?
 
-	printf "%16s" "$duration max $max_time "
+	printf "%-16s" " max $max_time "
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
-	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \
-	   [ $duration -lt $max_time ]; then
-		echo "[ OK ]"
+	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+		mptcp_lib_pr_ok
 		cat "$capout"
 		return 0
 	fi
 
-	echo " [ fail ]"
+	mptcp_lib_pr_fail
 	echo "client exit code $retc, server $rets" 1>&2
 	echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
 	ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
@@ -236,24 +230,29 @@ run_test()
 	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
 	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
 
-	# time is measure in ms
-	local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) ))
+	# time is measured in ms, account for transfer size, aggregated link speed
+	# and header overhead (10%)
+	#              ms    byte -> bit   10%        mbit      -> kbit -> bit  10%
+	local time=$((1000 * size  *  8  * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) ))
 
 	# mptcp_connect will do some sleeps to allow the mp_join handshake
-	# completion
-	time=$((time + 1350))
+	# completion (see mptcp_connect): 200ms on each side, add some slack
+	time=$((time + 400 + slack))
 
-	printf "%-50s" "$msg"
-	do_transfer $small $large $((time * 11 / 10))
+	mptcp_lib_print_title "$msg"
+	do_transfer $small $large $time
 	lret=$?
+	mptcp_lib_result_code "${lret}" "${msg}"
 	if [ $lret -ne 0 ]; then
 		ret=$lret
 		[ $bail -eq 0 ] || exit $ret
 	fi
 
-	printf "%-50s" "$msg - reverse direction"
-	do_transfer $large $small $((time * 11 / 10))
+	msg+=" - reverse direction"
+	mptcp_lib_print_title "${msg}"
+	do_transfer $large $small $time
 	lret=$?
+	mptcp_lib_result_code "${lret}" "${msg}"
 	if [ $lret -ne 0 ]; then
 		ret=$lret
 		[ $bail -eq 0 ] || exit $ret
@@ -264,7 +263,7 @@ while getopts "bcdh" option;do
 	case "$option" in
 	"h")
 		usage $0
-		exit 0
+		exit ${KSFT_PASS}
 		;;
 	"b")
 		bail=1
@@ -277,17 +276,19 @@ while getopts "bcdh" option;do
 		;;
 	"?")
 		usage $0
-		exit 1
+		exit ${KSFT_FAIL}
 		;;
 	esac
 done
 
 setup
 run_test 10 10 0 0 "balanced bwidth"
-run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
 
 # we still need some additional infrastructure to pass the following test-cases
-# run_test 30 10 0 0 "unbalanced bwidth"
-# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
-# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+run_test 10 3 0 0 "unbalanced bwidth"
+run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
+run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
+
+mptcp_lib_result_print_all_tap
 exit $ret
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
new file mode 100755
index 000000000000..9e2981f2d7f5
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -0,0 +1,897 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it.
+#shellcheck disable=SC2086
+
+# Some variables are used below but indirectly, see verify_*_event()
+#shellcheck disable=SC2034
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+
+if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+	echo "userspace pm tests are not supported by the kernel: SKIP"
+	exit ${KSFT_SKIP}
+fi
+mptcp_lib_check_tools ip
+
+ANNOUNCED=${MPTCP_LIB_EVENT_ANNOUNCED}
+REMOVED=${MPTCP_LIB_EVENT_REMOVED}
+SUB_ESTABLISHED=${MPTCP_LIB_EVENT_SUB_ESTABLISHED}
+SUB_CLOSED=${MPTCP_LIB_EVENT_SUB_CLOSED}
+LISTENER_CREATED=${MPTCP_LIB_EVENT_LISTENER_CREATED}
+LISTENER_CLOSED=${MPTCP_LIB_EVENT_LISTENER_CLOSED}
+
+AF_INET=${MPTCP_LIB_AF_INET}
+AF_INET6=${MPTCP_LIB_AF_INET6}
+
+file=""
+server_evts=""
+client_evts=""
+server_evts_pid=0
+client_evts_pid=0
+client4_pid=0
+server4_pid=0
+client6_pid=0
+server6_pid=0
+client4_token=""
+server4_token=""
+client6_token=""
+server6_token=""
+client4_port=0;
+client6_port=0;
+app4_port=50002
+new4_port=50003
+app6_port=50004
+client_addr_id=${RANDOM:0:2}
+server_addr_id=${RANDOM:0:2}
+
+ns1=""
+ns2=""
+ret=0
+test_name=""
+# a bit more space: because we have more to display
+MPTCP_LIB_TEST_FORMAT="%02u %-68s"
+
+print_title()
+{
+	mptcp_lib_pr_info "${1}"
+}
+
+# $1: test name
+print_test()
+{
+	test_name="${1}"
+
+	mptcp_lib_print_title "${test_name}"
+}
+
+test_pass()
+{
+	mptcp_lib_pr_ok
+	mptcp_lib_result_pass "${test_name}"
+}
+
+test_skip()
+{
+	mptcp_lib_pr_skip
+	mptcp_lib_result_skip "${test_name}"
+}
+
+# $1: msg
+test_fail()
+{
+	if [ ${#} -gt 0 ]
+	then
+		mptcp_lib_pr_fail "${@}"
+	fi
+	ret=${KSFT_FAIL}
+	mptcp_lib_result_fail "${test_name}"
+}
+
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
+cleanup()
+{
+	print_title "Cleanup"
+
+	# Terminate the MPTCP connection and related processes
+	local pid
+	for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
+		   $server_evts_pid $client_evts_pid
+	do
+		mptcp_lib_kill_wait $pid
+	done
+
+	mptcp_lib_ns_exit "${ns1}" "${ns2}"
+
+	rm -rf $file $client_evts $server_evts
+
+	mptcp_lib_pr_info "Done"
+}
+
+trap cleanup EXIT
+
+# Create and configure network namespaces for testing
+mptcp_lib_ns_init ns1 ns2
+for i in "$ns1" "$ns2" ;do
+	ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
+done
+
+#  "$ns1"              ns2
+#     ns1eth2    ns2eth1
+
+ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+
+# Add IPv4/v6 addresses to the namespaces
+ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
+ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
+ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ip -net "$ns1" link set ns1eth2 up
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth1
+ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
+ip -net "$ns2" link set ns2eth1 up
+
+file=$(mktemp)
+mptcp_lib_make_file "$file" 2 1
+
+# Capture netlink events over the two network namespaces running
+# the MPTCP client and server
+client_evts=$(mktemp)
+mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid
+server_evts=$(mktemp)
+mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
+sleep 0.5
+
+print_title "Init"
+print_test "Created network namespaces ns1, ns2"
+test_pass
+
+make_connection()
+{
+	local is_v6=$1
+	local app_port=$app4_port
+	local connect_addr="10.0.1.1"
+	local listen_addr="0.0.0.0"
+	if [ "$is_v6" = "v6" ]
+	then
+		connect_addr="dead:beef:1::1"
+		listen_addr="::"
+		app_port=$app6_port
+	else
+		is_v6="v4"
+	fi
+
+	:>"$client_evts"
+	:>"$server_evts"
+
+	# Run the server
+	ip netns exec "$ns1" \
+	   ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
+	local server_pid=$!
+	sleep 0.5
+
+	# Run the client, transfer $file and stay connected to the server
+	# to conduct tests
+	ip netns exec "$ns2" \
+	   ./mptcp_connect -s MPTCP -w 300 -m sendfile -p $app_port $connect_addr\
+	   2>&1 > /dev/null < "$file" &
+	local client_pid=$!
+	sleep 1
+
+	# Capture client/server attributes from MPTCP connection netlink events
+
+	local client_token
+	local client_port
+	local client_serverside
+	local server_token
+	local server_serverside
+
+	client_token=$(mptcp_lib_evts_get_info token "$client_evts")
+	client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
+	client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+	server_token=$(mptcp_lib_evts_get_info token "$server_evts")
+	server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
+
+	print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
+	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
+		   [ "$server_serverside" = 1 ]
+	then
+		test_pass
+	else
+		test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
+		mptcp_lib_result_print_all_tap
+		exit ${KSFT_FAIL}
+	fi
+
+	if [ "$is_v6" = "v6" ]
+	then
+		client6_token=$client_token
+		server6_token=$server_token
+		client6_port=$client_port
+		client6_pid=$client_pid
+		server6_pid=$server_pid
+	else
+		client4_token=$client_token
+		server4_token=$server_token
+		client4_port=$client_port
+		client4_pid=$client_pid
+		server4_pid=$server_pid
+	fi
+}
+
+# $@: all var names to check
+check_expected()
+{
+	if mptcp_lib_check_expected "${@}"
+	then
+		test_pass
+		return 0
+	fi
+
+	test_fail
+	return 1
+}
+
+verify_announce_event()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_addr=$4
+	local e_id=$5
+	local e_dport=$6
+	local e_af=$7
+	local type
+	local token
+	local addr
+	local dport
+	local id
+
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+	if [ "$e_af" = "v6" ]
+	then
+		addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
+	else
+		addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
+	fi
+	dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+	id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
+
+	check_expected "type" "token" "addr" "dport" "id"
+}
+
+test_announce()
+{
+	print_title "Announce tests"
+
+	# Capture events on the network namespace running the server
+	:>"$server_evts"
+
+	# ADD_ADDR using an invalid token should result in no action
+	local invalid_token=$(( client4_token - 1))
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token $invalid_token id\
+	   $client_addr_id dev ns2eth1 > /dev/null 2>&1
+
+	local type
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
+	print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token"
+	if [ "$type" = "" ]
+	then
+		test_pass
+	else
+		test_fail "type defined: ${type}"
+	fi
+
+	# ADD_ADDR from the client to server machine reusing the subflow port
+	:>"$server_evts"
+	ip netns exec "$ns2"\
+	   ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
+	   ns2eth1
+	print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port"
+	sleep 0.5
+	verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \
+			      "$client4_port"
+
+	# ADD_ADDR6 from the client to server machine reusing the subflow port
+	:>"$server_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann\
+	   dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1
+	print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port"
+	sleep 0.5
+	verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
+			      "$client_addr_id" "$client6_port" "v6"
+
+	# ADD_ADDR from the client to server machine using a new port
+	:>"$server_evts"
+	client_addr_id=$((client_addr_id+1))
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id dev ns2eth1 port $new4_port
+	print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port"
+	sleep 0.5
+	verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
+			      "$client_addr_id" "$new4_port"
+
+	# Capture events on the network namespace running the client
+	:>"$client_evts"
+
+	# ADD_ADDR from the server to client machine reusing the subflow port
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id dev ns1eth2
+	print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
+	sleep 0.5
+	verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+			      "$server_addr_id" "$app4_port"
+
+	# ADD_ADDR6 from the server to client machine reusing the subflow port
+	:>"$client_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+	   $server_addr_id dev ns1eth2
+	print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port"
+	sleep 0.5
+	verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
+			      "$server_addr_id" "$app6_port" "v6"
+
+	# ADD_ADDR from the server to client machine using a new port
+	:>"$client_evts"
+	server_addr_id=$((server_addr_id+1))
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id dev ns1eth2 port $new4_port
+	print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port"
+	sleep 0.5
+	verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+			      "$server_addr_id" "$new4_port"
+}
+
+verify_remove_event()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_id=$4
+	local type
+	local token
+	local id
+
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+	id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
+
+	check_expected "type" "token" "id"
+}
+
+test_remove()
+{
+	print_title "Remove tests"
+
+	# Capture events on the network namespace running the server
+	:>"$server_evts"
+
+	# RM_ADDR using an invalid token should result in no action
+	local invalid_token=$(( client4_token - 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\
+	   $client_addr_id > /dev/null 2>&1
+	print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token"
+	local type
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
+	if [ "$type" = "" ]
+	then
+		test_pass
+	else
+		test_fail "unexpected type: ${type}"
+	fi
+
+	# RM_ADDR using an invalid addr id should result in no action
+	local invalid_id=$(( client_addr_id + 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $invalid_id > /dev/null 2>&1
+	print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id"
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
+	if [ "$type" = "" ]
+	then
+		test_pass
+	else
+		test_fail "unexpected type: ${type}"
+	fi
+
+	# RM_ADDR from the client to server machine
+	:>"$server_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $client_addr_id
+	print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
+	sleep 0.5
+	verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+	# RM_ADDR from the client to server machine
+	:>"$server_evts"
+	client_addr_id=$(( client_addr_id - 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $client_addr_id
+	print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
+	sleep 0.5
+	verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+	# RM_ADDR6 from the client to server machine
+	:>"$server_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
+	   $client_addr_id
+	print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1"
+	sleep 0.5
+	verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id"
+
+	# Capture events on the network namespace running the client
+	:>"$client_evts"
+
+	# RM_ADDR from the server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+	   $server_addr_id
+	print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
+	sleep 0.5
+	verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+	# RM_ADDR from the server to client machine
+	:>"$client_evts"
+	server_addr_id=$(( server_addr_id - 1 ))
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+	   $server_addr_id
+	print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
+	sleep 0.5
+	verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+	# RM_ADDR6 from the server to client machine
+	:>"$client_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
+	   $server_addr_id
+	print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2"
+	sleep 0.5
+	verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id"
+}
+
+verify_subflow_events()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_family=$4
+	local e_saddr=$5
+	local e_daddr=$6
+	local e_dport=$7
+	local e_locid=$8
+	local e_remid=$9
+	shift 2
+	local e_from=$8
+	local e_to=$9
+	local type
+	local token
+	local family
+	local saddr
+	local daddr
+	local dport
+	local locid
+	local remid
+	local info
+
+	info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})"
+
+	if [ "$e_type" = "$SUB_ESTABLISHED" ]
+	then
+		if [ "$e_family" = "$AF_INET6" ]
+		then
+			print_test "CREATE_SUBFLOW6 ${info}"
+		else
+			print_test "CREATE_SUBFLOW ${info}"
+		fi
+	else
+		if [ "$e_family" = "$AF_INET6" ]
+		then
+			print_test "DESTROY_SUBFLOW6 ${info}"
+		else
+			print_test "DESTROY_SUBFLOW ${info}"
+		fi
+	fi
+
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+	family=$(mptcp_lib_evts_get_info family "$evt" $e_type)
+	dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+	locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type)
+	remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
+	if [ "$family" = "$AF_INET6" ]
+	then
+		saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type)
+		daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
+	else
+		saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type)
+		daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
+	fi
+
+	check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid"
+}
+
+test_subflows()
+{
+	print_title "Subflows v4 or v6 only tests"
+
+	# Capture events on the network namespace running the server
+	:>"$server_evts"
+
+	# Attempt to add a listener at 10.0.2.2:<subflow-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+	   "$client4_port" &
+	local listener_pid=$!
+
+	# ADD_ADDR from client to server machine reusing the subflow port
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id
+	sleep 0.5
+
+	# CREATE_SUBFLOW from server to client machine
+	:>"$server_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
+	   rport "$client4_port" token "$server4_token"
+	sleep 0.5
+	verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \
+			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	mptcp_lib_kill_wait $listener_pid
+
+	local sport
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
+
+	# DESTROY_SUBFLOW from server to client machine
+	:>"$server_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+	   "$client4_port" token "$server4_token"
+	sleep 0.5
+	verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client4_token"
+	sleep 0.5
+
+	# Attempt to add a listener at dead:beef:2::2:<subflow-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen dead:beef:2::2\
+	   "$client6_port" &
+	listener_pid=$!
+
+	# ADD_ADDR6 from client to server machine reusing the subflow port
+	:>"$server_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\
+	   $client_addr_id
+	sleep 0.5
+
+	# CREATE_SUBFLOW6 from server to client machine
+	:>"$server_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\
+	   dead:beef:2::2 rport "$client6_port" token "$server6_token"
+	sleep 0.5
+	verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\
+			      "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	mptcp_lib_kill_wait $listener_pid
+
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
+
+	# DESTROY_SUBFLOW6 from server to client machine
+	:>"$server_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\
+	   dead:beef:2::2 rport "$client6_port" token "$server6_token"
+	sleep 0.5
+	verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\
+			      "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client6_token"
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.2:<new-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+	   $new4_port &
+	listener_pid=$!
+
+	# ADD_ADDR from client to server machine using a new port
+	:>"$server_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id port $new4_port
+	sleep 0.5
+
+	# CREATE_SUBFLOW from server to client machine
+	:>"$server_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\
+	   $new4_port token "$server4_token"
+	sleep 0.5
+	verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\
+			      "10.0.2.1" "10.0.2.2" "$new4_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	mptcp_lib_kill_wait $listener_pid
+
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
+
+	# DESTROY_SUBFLOW from server to client machine
+	:>"$server_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+	   $new4_port token "$server4_token"
+	sleep 0.5
+	verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+			      "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client4_token"
+
+	# Capture events on the network namespace running the client
+	:>"$client_evts"
+
+	# Attempt to add a listener at 10.0.2.1:<subflow-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+	   $app4_port &
+	listener_pid=$!
+
+	# ADD_ADDR from server to client machine reusing the subflow port
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id
+	sleep 0.5
+
+	# CREATE_SUBFLOW from client to server machine
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+	   $app4_port token "$client4_token"
+	sleep 0.5
+	verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\
+			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	mptcp_lib_kill_wait $listener_pid
+
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
+
+	# DESTROY_SUBFLOW from client to server machine
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+	   $app4_port token "$client4_token"
+	sleep 0.5
+	verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server4_token"
+	sleep 0.5
+
+	# Attempt to add a listener at dead:beef:2::1:<subflow-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen dead:beef:2::1\
+	   $app6_port &
+	listener_pid=$!
+
+	# ADD_ADDR6 from server to client machine reusing the subflow port
+	:>"$client_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+	   $server_addr_id
+	sleep 0.5
+
+	# CREATE_SUBFLOW6 from client to server machine
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\
+	   dead:beef:2::1 rport $app6_port token "$client6_token"
+	sleep 0.5
+	verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\
+			      "$AF_INET6" "dead:beef:2::2"\
+			      "dead:beef:2::1" "$app6_port" "23"\
+			      "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	mptcp_lib_kill_wait $listener_pid
+
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
+
+	# DESTROY_SUBFLOW6 from client to server machine
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\
+	   dead:beef:2::1 rport $app6_port token "$client6_token"
+	sleep 0.5
+	verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\
+			      "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR6 from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server6_token"
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.1:<new-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+	   $new4_port &
+	listener_pid=$!
+
+	# ADD_ADDR from server to client machine using a new port
+	:>"$client_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id port $new4_port
+	sleep 0.5
+
+	# CREATE_SUBFLOW from client to server machine
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+	   $new4_port token "$client4_token"
+	sleep 0.5
+	verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\
+			      "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	mptcp_lib_kill_wait $listener_pid
+
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
+
+	# DESTROY_SUBFLOW from client to server machine
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+	   $new4_port token "$client4_token"
+	sleep 0.5
+	verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+			      "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server4_token"
+}
+
+test_subflows_v4_v6_mix()
+{
+	print_title "Subflows v4 and v6 mix tests"
+
+	# Attempt to add a listener at 10.0.2.1:<subflow-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+	   $app6_port &
+	local listener_pid=$!
+
+	# ADD_ADDR4 from server to client machine reusing the subflow port on
+	# the established v6 connection
+	:>"$client_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\
+	   $server_addr_id dev ns1eth2
+	print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
+	sleep 0.5
+	verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\
+			      "$server_addr_id" "$app6_port"
+
+	# CREATE_SUBFLOW from client to server machine
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+	   $app6_port token "$client6_token"
+	sleep 0.5
+	verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\
+			      "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\
+			      "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	mptcp_lib_kill_wait $listener_pid
+
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
+
+	# DESTROY_SUBFLOW from client to server machine
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+	   $app6_port token "$client6_token"
+	sleep 0.5
+	verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \
+			      "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\
+			      "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server6_token"
+	sleep 0.5
+}
+
+test_prio()
+{
+	print_title "Prio tests"
+
+	local count
+
+	# Send MP_PRIO signal from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$app4_port"
+	sleep 0.5
+
+	# Check TX
+	print_test "MP_PRIO TX"
+	count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx")
+	if [ -z "$count" ]; then
+		test_skip
+	elif [ $count != 1 ]; then
+		test_fail "Count != 1: ${count}"
+	else
+		test_pass
+	fi
+
+	# Check RX
+	print_test "MP_PRIO RX"
+	count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx")
+	if [ -z "$count" ]; then
+		test_skip
+	elif [ $count != 1 ]; then
+		test_fail "Count != 1: ${count}"
+	else
+		test_pass
+	fi
+}
+
+verify_listener_events()
+{
+	if mptcp_lib_verify_listener_events "${@}"; then
+		test_pass
+	else
+		test_fail
+	fi
+}
+
+test_listener()
+{
+	print_title "Listener tests"
+
+	if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+		print_test "LISTENER events"
+		test_skip
+		return
+	fi
+
+	# Capture events on the network namespace running the client
+	:>$client_evts
+
+	# Attempt to add a listener at 10.0.2.2:<subflow-port>
+	ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\
+		$client4_port &
+	local listener_pid=$!
+
+	sleep 0.5
+	print_test "CREATE_LISTENER 10.0.2.2:$client4_port"
+	verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
+
+	# ADD_ADDR from client to server machine reusing the subflow port
+	ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\
+		$client_addr_id
+	sleep 0.5
+
+	# CREATE_SUBFLOW from server to client machine
+	ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
+		rport $client4_port token $server4_token
+	sleep 0.5
+
+	# Delete the listener from the client ns, if one was created
+	mptcp_lib_kill_wait $listener_pid
+
+	sleep 0.5
+	print_test "CLOSE_LISTENER 10.0.2.2:$client4_port"
+	verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
+}
+
+print_title "Make connections"
+make_connection
+make_connection "v6"
+
+test_announce
+test_remove
+test_subflows
+test_subflows_v4_v6_mix
+test_prio
+test_listener
+
+mptcp_lib_result_print_all_tap
+exit ${ret}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 825ffec85cea..89c22f5320e0 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -70,23 +70,22 @@ case "${TXMODE}" in
 esac
 
 # Start of state changes: install cleanup handler
-save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
 
 cleanup() {
 	ip netns del "${NS2}"
 	ip netns del "${NS1}"
-	sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
 }
 
 trap cleanup EXIT
 
-# Configure system settings
-sysctl -w -q "${path_sysctl_mem}=1000000"
-
 # Create virtual ethernet pair between network namespaces
 ip netns add "${NS1}"
 ip netns add "${NS2}"
 
+# Configure system settings
+ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000"
+ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
+
 ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
   peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
 
diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.c
new file mode 100644
index 000000000000..ac54c36b25fc
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000  // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+	const int l2_header_size =  sizeof(struct ethhdr);
+	void *data = (void *)(long)skb->data;
+	const void *data_end = (void *)(long)skb->data_end;
+	const struct ethhdr * const eth = data;  // used iff is_ethernet
+	const struct ipv6hdr * const ip6 =  (void *)(eth + 1);
+
+	// Require ethernet dst mac address to be our unicast address.
+	if  (skb->pkt_type != PACKET_HOST)
+		return TC_ACT_OK;
+
+	// Must be meta-ethernet IPv6 frame
+	if (skb->protocol != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	// Must have (ethernet and) ipv6 header
+	if (data + l2_header_size + sizeof(*ip6) > data_end)
+		return TC_ACT_OK;
+
+	// Ethertype - if present - must be IPv6
+	if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	// IP version must be 6
+	if (ip6->version != 6)
+		return TC_ACT_OK;
+	// Maximum IPv6 payload length that can be translated to IPv4
+	if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+		return TC_ACT_OK;
+	switch (ip6->nexthdr) {
+	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+	case IPPROTO_UDP:  // address means there is no need to update their checksums.
+	case IPPROTO_GRE:  // We do not need to bother looking at GRE/ESP headers,
+	case IPPROTO_ESP:  // since there is never a checksum to update.
+		break;
+	default:  // do not know how to handle anything else
+		return TC_ACT_OK;
+	}
+
+	struct ethhdr eth2;  // used iff is_ethernet
+
+	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+	eth2.h_proto = bpf_htons(ETH_P_IP);  // But replace the ethertype
+
+	struct iphdr ip = {
+		.version = 4,                                                      // u4
+		.ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
+		.tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
+		.tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
+		.id = 0,                                                           // u16
+		.frag_off = bpf_htons(IP_DF),                                          // u16
+		.ttl = ip6->hop_limit,                                             // u8
+		.protocol = ip6->nexthdr,                                          // u8
+		.check = 0,                                                        // u16
+		.saddr = 0x0201a8c0,                            // u32
+		.daddr = 0x0101a8c0,                                         // u32
+	};
+
+	// Calculate the IPv4 one's complement checksum of the IPv4 header.
+	__wsum sum4 = 0;
+
+	for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+		sum4 += ((__u16 *)&ip)[i];
+
+	// Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+	ip.check = (__u16)~sum4;                // sum4 cannot be zero, so this is never 0xFFFF
+
+	// Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+	__wsum sum6 = 0;
+	// We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+	for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+		sum6 += ~((__u16 *)ip6)[i];  // note the bitwise negation
+
+	// Note that there is no L4 checksum update: we are relying on the checksum neutrality
+	// of the ipv6 address chosen by netd's ClatdController.
+
+	// Packet mutations begin - point of no return, but if this first modification fails
+	// the packet is probably still pristine, so let clatd handle it.
+	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+		return TC_ACT_OK;
+	bpf_csum_update(skb, sum6);
+
+	data = (void *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+	if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+		return TC_ACT_SHOT;
+
+	struct ethhdr *new_eth = data;
+
+	// Copy over the updated ethernet header
+	*new_eth = eth2;
+
+	// Copy over the new ipv4 header.
+	*(struct iphdr *)(new_eth + 1) = ip;
+	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+	const int l2_header_size =  sizeof(struct ethhdr);
+	void *data = (void *)(long)skb->data;
+	const void *data_end = (void *)(long)skb->data_end;
+	const struct ethhdr *const eth = data;  // used iff is_ethernet
+	const struct iphdr *const ip4 = (void *)(eth + 1);
+
+	// Must be meta-ethernet IPv4 frame
+	if (skb->protocol != bpf_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	// Must have ipv4 header
+	if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+		return TC_ACT_OK;
+
+	// Ethertype - if present - must be IPv4
+	if (eth->h_proto != bpf_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	// IP version must be 4
+	if (ip4->version != 4)
+		return TC_ACT_OK;
+
+	// We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+	if (ip4->ihl != 5)
+		return TC_ACT_OK;
+
+	// Maximum IPv6 payload length that can be translated to IPv4
+	if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+		return TC_ACT_OK;
+
+	// Calculate the IPv4 one's complement checksum of the IPv4 header.
+	__wsum sum4 = 0;
+
+	for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+		sum4 += ((__u16 *)ip4)[i];
+
+	// Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+	// for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+	if (sum4 != 0xFFFF)
+		return TC_ACT_OK;
+
+	// Minimum IPv4 total length is the size of the header
+	if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+		return TC_ACT_OK;
+
+	// We are incapable of dealing with IPv4 fragments
+	if (ip4->frag_off & ~bpf_htons(IP_DF))
+		return TC_ACT_OK;
+
+	switch (ip4->protocol) {
+	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+	case IPPROTO_GRE:  // address means there is no need to update their checksums.
+	case IPPROTO_ESP:  // We do not need to bother looking at GRE/ESP headers,
+		break;         // since there is never a checksum to update.
+
+	case IPPROTO_UDP:  // See above comment, but must also have UDP header...
+		if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+			return TC_ACT_OK;
+		const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+		// If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+		// checksum.  Otherwise the network or more likely the NAT64 gateway might
+		// drop the packet because in most cases IPv6/UDP packets with a zero checksum
+		// are invalid. See RFC 6935.  TODO: calculate checksum via bpf_csum_diff()
+		if (!uh->check)
+			return TC_ACT_OK;
+		break;
+
+	default:  // do not know how to handle anything else
+		return TC_ACT_OK;
+	}
+	struct ethhdr eth2;  // used iff is_ethernet
+
+	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+	eth2.h_proto = bpf_htons(ETH_P_IPV6);  // But replace the ethertype
+
+	struct ipv6hdr ip6 = {
+		.version = 6,                                    // __u8:4
+		.priority = ip4->tos >> 4,                       // __u8:4
+		.flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0},       // __u8[3]
+		.payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20),  // __be16
+		.nexthdr = ip4->protocol,                        // __u8
+		.hop_limit = ip4->ttl,                           // __u8
+	};
+	ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+	ip6.saddr.in6_u.u6_addr32[1] = 0;
+	ip6.saddr.in6_u.u6_addr32[2] = 0;
+	ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+	ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+	ip6.daddr.in6_u.u6_addr32[1] = 0;
+	ip6.daddr.in6_u.u6_addr32[2] = 0;
+	ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+	// Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+	__wsum sum6 = 0;
+	// We'll end up with a non-zero sum due to ip6.version == 6
+	for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+		sum6 += ((__u16 *)&ip6)[i];
+
+	// Packet mutations begin - point of no return, but if this first modification fails
+	// the packet is probably still pristine, so let clatd handle it.
+	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+		return TC_ACT_OK;
+
+	// This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+	// In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+	// thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+	// However, we've already verified the ipv4 checksum is correct and thus 0.
+	// Thus we only need to add the ipv6 header's sum.
+	//
+	// bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+	// (-ENOTSUPP) if it isn't.  So we just ignore the return code (see above for more details).
+	bpf_csum_update(skb, sum6);
+
+	// bpf_skb_change_proto() invalidates all pointers - reload them.
+	data = (void *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+
+	// I cannot think of any valid way for this error condition to trigger, however I do
+	// believe the explicit check is required to keep the in kernel ebpf verifier happy.
+	if (data + l2_header_size + sizeof(ip6) > data_end)
+		return TC_ACT_SHOT;
+
+	struct ethhdr *new_eth = data;
+
+	// Copy over the updated ethernet header
+	*new_eth = eth2;
+	// Copy over the new ipv4 header.
+	*(struct ipv6hdr *)(new_eth + 1) = ip6;
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
new file mode 100755
index 000000000000..5db69dad0cfc
--- /dev/null
+++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
@@ -0,0 +1,249 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for the accept_untracked_na feature to
+# enable RFC9131 behaviour. The following is the test-matrix.
+# drop   accept  fwding                   behaviour
+# ----   ------  ------  ----------------------------------------------
+#    1        X       X  Don't update NC
+#    0        0       X  Don't update NC
+#    0        1       0  Don't update NC
+#    0        1       1  Add a STALE NC entry
+
+source lib.sh
+ret=0
+
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+HOST_INTF="veth-host"
+ROUTER_INTF="veth-router"
+
+ROUTER_ADDR="2000:20::1"
+HOST_ADDR="2000:20::2"
+SUBNET_WIDTH=64
+ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}"
+HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}"
+
+tcpdump_stdout=
+tcpdump_stderr=
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+setup()
+{
+	set -e
+
+	local drop_unsolicited_na=$1
+	local accept_untracked_na=$2
+	local forwarding=$3
+
+	# Setup two namespaces and a veth tunnel across them.
+	# On end of the tunnel is a router and the other end is a host.
+	setup_ns HOST_NS ROUTER_NS
+	IP_HOST="ip -6 -netns ${HOST_NS}"
+	IP_HOST_EXEC="ip netns exec ${HOST_NS}"
+	IP_ROUTER="ip -6 -netns ${ROUTER_NS}"
+	IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}"
+
+	${IP_ROUTER} link add ${ROUTER_INTF} type veth \
+                peer name ${HOST_INTF} netns ${HOST_NS}
+
+	# Enable IPv6 on both router and host, and configure static addresses.
+	# The router here is the DUT
+	# Setup router configuration as specified by the arguments.
+	# forwarding=0 case is to check that a non-router
+	# doesn't add neighbour entries.
+        ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.forwarding=${forwarding}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na}
+	${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0
+	${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF}
+
+	# Turn on ndisc_notify on host interface so that
+	# the host sends unsolicited NAs.
+	HOST_CONF=net.ipv6.conf.${HOST_INTF}
+	${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1
+	${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0
+	${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF}
+
+	set +e
+}
+
+start_tcpdump() {
+	set -e
+	tcpdump_stdout=`mktemp`
+	tcpdump_stderr=`mktemp`
+	${IP_ROUTER_EXEC} timeout 15s \
+                tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
+                "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \
+                > ${tcpdump_stdout} 2> /dev/null
+	set +e
+}
+
+cleanup_tcpdump()
+{
+	set -e
+	[[ ! -z  ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
+	[[ ! -z  ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
+	tcpdump_stdout=
+	tcpdump_stderr=
+	set +e
+}
+
+cleanup()
+{
+	cleanup_tcpdump
+	ip netns del ${HOST_NS}
+	ip netns del ${ROUTER_NS}
+}
+
+link_up() {
+	set -e
+	${IP_ROUTER} link set dev ${ROUTER_INTF} up
+	${IP_HOST} link set dev ${HOST_INTF} up
+	set +e
+}
+
+verify_ndisc() {
+	local drop_unsolicited_na=$1
+	local accept_untracked_na=$2
+	local forwarding=$3
+
+	neigh_show_output=$(${IP_ROUTER} neigh show \
+                to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale)
+	if [ ${drop_unsolicited_na} -eq 0 ] && \
+			[ ${accept_untracked_na} -eq 1 ] && \
+			[ ${forwarding} -eq 1 ]; then
+		# Neighbour entry expected to be present for 011 case
+		[[ ${neigh_show_output} ]]
+	else
+		# Neighbour entry expected to be absent for all other cases
+		[[ -z ${neigh_show_output} ]]
+	fi
+}
+
+test_unsolicited_na_common()
+{
+	# Setup the test bed, but keep links down
+	setup $1 $2 $3
+
+	# Bring the link up, wait for the NA,
+	# and add a delay to ensure neighbour processing is done.
+	link_up
+	start_tcpdump
+
+	# Verify the neighbour table
+	verify_ndisc $1 $2 $3
+
+}
+
+test_unsolicited_na_combination() {
+	test_unsolicited_na_common $1 $2 $3
+	test_msg=("test_unsolicited_na: "
+		"drop_unsolicited_na=$1 "
+		"accept_untracked_na=$2 "
+		"forwarding=$3")
+	log_test $? 0 "${test_msg[*]}"
+	cleanup
+}
+
+test_unsolicited_na_combinations() {
+	# Args: drop_unsolicited_na accept_untracked_na forwarding
+
+	# Expect entry
+	test_unsolicited_na_combination 0 1 1
+
+	# Expect no entry
+	test_unsolicited_na_combination 0 0 0
+	test_unsolicited_na_combination 0 0 1
+	test_unsolicited_na_combination 0 1 0
+	test_unsolicited_na_combination 1 0 0
+	test_unsolicited_na_combination 1 0 1
+	test_unsolicited_na_combination 1 1 0
+	test_unsolicited_na_combination 1 1 1
+}
+
+###############################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+EOF
+}
+
+###############################################################################
+# main
+
+while getopts :pPh o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+	echo "SKIP: Could not run test without tcpdump tool"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+test_unsolicited_na_combinations
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
new file mode 100644
index 000000000000..6596fe03c77f
--- /dev/null
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Helper functions
+
+wait_local_port_listen()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+	local protocol="${3}"
+	local pattern
+	local i
+
+	pattern=":$(printf "%04X" "${port}") "
+
+	# for tcp protocol additionally check the socket state
+	[ ${protocol} = "tcp" ] && pattern="${pattern}0A"
+	for i in $(seq 10); do
+		if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+		   /proc/net/"${protocol}"* | grep -q "${pattern}"; then
+			break
+		fi
+		sleep 0.1
+	done
+}
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
new file mode 100755
index 000000000000..6974474c26f3
--- /dev/null
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+DEV=dummy-dev0
+DEV2=dummy-dev1
+ALT_NAME=some-alt-name
+
+RET_CODE=0
+
+cleanup() {
+    cleanup_ns $NS $test_ns
+}
+
+trap cleanup EXIT
+
+fail() {
+    echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+    RET_CODE=1
+}
+
+setup_ns NS test_ns
+
+#
+# Test basic move without a rename
+#
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns $test_ns ||
+    fail "Can't perform a netns move"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip -netns $test_ns link del $DEV || fail
+
+#
+# Test move with a conflict
+#
+ip -netns $test_ns link add name $DEV type dummy
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns $test_ns 2> /dev/null &&
+    fail "Performed a netns move with a name conflict"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip -netns $NS link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
+
+#
+# Test move with a conflict and rename
+#
+ip -netns $test_ns link add name $DEV type dummy
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns $test_ns name $DEV2 ||
+    fail "Can't perform a netns move with rename"
+ip -netns $test_ns link del $DEV2 || fail
+ip -netns $test_ns link del $DEV || fail
+
+#
+# Test dup alt-name with netns move
+#
+ip -netns $test_ns link add name $DEV type dummy || fail
+ip -netns $test_ns link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $NS link add name $DEV2 type dummy || fail
+ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail
+
+ip -netns $NS link set dev $DEV2 netns $test_ns 2> /dev/null &&
+    fail "Moved with alt-name dup"
+
+ip -netns $test_ns link del $DEV || fail
+ip -netns $NS link del $DEV2 || fail
+
+#
+# Test creating alt-name in one net-ns and using in another
+#
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $NS link set dev $DEV netns $test_ns || fail
+ip -netns $test_ns link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move"
+ip -netns $NS link show dev $ALT_NAME 2> /dev/null &&
+    fail "Can still find alt-name after move"
+ip -netns $test_ns link del $DEV || fail
+
+echo -ne "$(basename $0) \t\t\t\t"
+if [ $RET_CODE -eq 0 ]; then
+    echo "[  OK  ]"
+else
+    echo "[ FAIL ]"
+fi
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index f75c53ce0a2d..cd8a58097448 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -9,14 +9,18 @@
 #include <sys/types.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
+#include <sys/wait.h>
 #include <linux/tcp.h>
+#include <linux/udp.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <netinet/in.h>
+#include <netinet/ip.h>
 #include <netdb.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <limits.h>
+#include <sched.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -24,6 +28,11 @@
 #include <unistd.h>
 #include <time.h>
 #include <errno.h>
+#include <getopt.h>
+
+#include <linux/xfrm.h>
+#include <linux/ipsec.h>
+#include <linux/pfkeyv2.h>
 
 #ifndef IPV6_UNICAST_IF
 #define IPV6_UNICAST_IF         76
@@ -34,6 +43,8 @@
 
 #define DEFAULT_PORT 12345
 
+#define NS_PREFIX "/run/netns/"
+
 #ifndef MAX
 #define MAX(a, b)  ((a) > (b) ? (a) : (b))
 #endif
@@ -43,12 +54,15 @@
 
 struct sock_args {
 	/* local address */
+	const char *local_addr_str;
+	const char *client_local_addr_str;
 	union {
 		struct in_addr  in;
 		struct in6_addr in6;
 	} local_addr;
 
 	/* remote address */
+	const char *remote_addr_str;
 	union {
 		struct in_addr  in;
 		struct in6_addr in6;
@@ -62,7 +76,9 @@ struct sock_args {
 		     has_grp:1,
 		     has_expected_laddr:1,
 		     has_expected_raddr:1,
-		     bind_test_only:1;
+		     bind_test_only:1,
+		     client_dontroute:1,
+		     server_dontroute:1;
 
 	unsigned short port;
 
@@ -71,32 +87,52 @@ struct sock_args {
 	int version;   /* AF_INET/AF_INET6 */
 
 	int use_setsockopt;
+	int use_freebind;
 	int use_cmsg;
+	uint8_t dsfield;
 	const char *dev;
+	const char *server_dev;
 	int ifindex;
 
+	const char *clientns;
+	const char *serverns;
+
 	const char *password;
+	const char *client_pw;
 	/* prefix for MD5 password */
+	const char *md5_prefix_str;
 	union {
 		struct sockaddr_in v4;
 		struct sockaddr_in6 v6;
 	} md5_prefix;
 	unsigned int prefix_len;
+	/* 0: default, -1: force off, +1: force on */
+	int bind_key_ifindex;
 
 	/* expected addresses and device index for connection */
+	const char *expected_dev;
+	const char *expected_server_dev;
 	int expected_ifindex;
 
 	/* local address */
+	const char *expected_laddr_str;
 	union {
 		struct in_addr  in;
 		struct in6_addr in6;
 	} expected_laddr;
 
 	/* remote address */
+	const char *expected_raddr_str;
 	union {
 		struct in_addr  in;
 		struct in6_addr in6;
 	} expected_raddr;
+
+	/* ESP in UDP encap test */
+	int use_xfrm;
+
+	/* use send() and connect() instead of sendto */
+	int datagram_connect;
 };
 
 static int server_mode;
@@ -186,7 +222,7 @@ static void log_address(const char *desc, struct sockaddr *sa)
 	if (sa->sa_family == AF_INET) {
 		struct sockaddr_in *s = (struct sockaddr_in *) sa;
 
-		log_msg("%s %s:%d",
+		log_msg("%s %s:%d\n",
 			desc,
 			inet_ntop(AF_INET, &s->sin_addr, addrstr,
 				  sizeof(addrstr)),
@@ -195,18 +231,37 @@ static void log_address(const char *desc, struct sockaddr *sa)
 	} else if (sa->sa_family == AF_INET6) {
 		struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa;
 
-		log_msg("%s [%s]:%d",
+		log_msg("%s [%s]:%d\n",
 			desc,
 			inet_ntop(AF_INET6, &s6->sin6_addr, addrstr,
 				  sizeof(addrstr)),
 			ntohs(s6->sin6_port));
 	}
 
-	printf("\n");
-
 	fflush(stdout);
 }
 
+static int switch_ns(const char *ns)
+{
+	char path[PATH_MAX];
+	int fd, ret;
+
+	if (geteuid())
+		log_error("warning: likely need root to set netns %s!\n", ns);
+
+	snprintf(path, sizeof(path), "%s%s", NS_PREFIX, ns);
+	fd = open(path, 0);
+	if (fd < 0) {
+		log_err_errno("Failed to open netns path; can not switch netns");
+		return 1;
+	}
+
+	ret = setns(fd, CLONE_NEWNET);
+	close(fd);
+
+	return ret;
+}
+
 static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args)
 {
 	int keylen = strlen(args->password);
@@ -226,11 +281,14 @@ static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args
 	}
 	memcpy(&md5sig.tcpm_addr, addr, alen);
 
-	if (args->ifindex) {
+	if ((args->ifindex && args->bind_key_ifindex >= 0) || args->bind_key_ifindex >= 1) {
 		opt = TCP_MD5SIG_EXT;
 		md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
 
 		md5sig.tcpm_ifindex = args->ifindex;
+		log_msg("TCP_MD5SIG_FLAG_IFINDEX set tcpm_ifindex=%d\n", md5sig.tcpm_ifindex);
+	} else {
+		log_msg("TCP_MD5SIG_FLAG_IFINDEX off\n", md5sig.tcpm_ifindex);
 	}
 
 	rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig));
@@ -259,13 +317,13 @@ static int tcp_md5_remote(int sd, struct sock_args *args)
 	switch (args->version) {
 	case AF_INET:
 		sin.sin_port = htons(args->port);
-		sin.sin_addr = args->remote_addr.in;
+		sin.sin_addr = args->md5_prefix.v4.sin_addr;
 		addr = &sin;
 		alen = sizeof(sin);
 		break;
 	case AF_INET6:
 		sin6.sin6_port = htons(args->port);
-		sin6.sin6_addr = args->remote_addr.in6;
+		sin6.sin6_addr = args->md5_prefix.v6.sin6_addr;
 		addr = &sin6;
 		alen = sizeof(sin6);
 		break;
@@ -463,6 +521,29 @@ static int set_membership(int sd, uint32_t grp, uint32_t addr, int ifindex)
 	return 0;
 }
 
+static int set_freebind(int sd, int version)
+{
+	unsigned int one = 1;
+	int rc = 0;
+
+	switch (version) {
+	case AF_INET:
+		if (setsockopt(sd, SOL_IP, IP_FREEBIND, &one, sizeof(one))) {
+			log_err_errno("setsockopt(IP_FREEBIND)");
+			rc = -1;
+		}
+		break;
+	case AF_INET6:
+		if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
+			log_err_errno("setsockopt(IPV6_FREEBIND");
+			rc = -1;
+		}
+		break;
+	}
+
+	return rc;
+}
+
 static int set_broadcast(int sd)
 {
 	unsigned int one = 1;
@@ -502,6 +583,48 @@ static int set_reuseaddr(int sd)
 	return rc;
 }
 
+static int set_dsfield(int sd, int version, int dsfield)
+{
+	if (!dsfield)
+		return 0;
+
+	switch (version) {
+	case AF_INET:
+		if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield,
+			       sizeof(dsfield)) < 0) {
+			log_err_errno("setsockopt(IP_TOS)");
+			return -1;
+		}
+		break;
+
+	case AF_INET6:
+		if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield,
+			       sizeof(dsfield)) < 0) {
+			log_err_errno("setsockopt(IPV6_TCLASS)");
+			return -1;
+		}
+		break;
+
+	default:
+		log_error("Invalid address family\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int set_dontroute(int sd)
+{
+	unsigned int one = 1;
+
+	if (setsockopt(sd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) {
+		log_err_errno("setsockopt(SO_DONTROUTE)");
+		return -1;
+	}
+
+	return 0;
+}
+
 static int str_to_uint(const char *str, int min, int max, unsigned int *value)
 {
 	int number;
@@ -522,6 +645,33 @@ static int str_to_uint(const char *str, int min, int max, unsigned int *value)
 	return -1;
 }
 
+static int resolve_devices(struct sock_args *args)
+{
+	if (args->dev) {
+		args->ifindex = get_ifidx(args->dev);
+		if (args->ifindex < 0) {
+			log_error("Invalid device name\n");
+			return 1;
+		}
+	}
+
+	if (args->expected_dev) {
+		unsigned int tmp;
+
+		if (str_to_uint(args->expected_dev, 0, INT_MAX, &tmp) == 0) {
+			args->expected_ifindex = (int)tmp;
+		} else {
+			args->expected_ifindex = get_ifidx(args->expected_dev);
+			if (args->expected_ifindex < 0) {
+				fprintf(stderr, "Invalid expected device\n");
+				return 1;
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int expected_addr_match(struct sockaddr *sa, void *expected,
 			       const char *desc)
 {
@@ -533,7 +683,7 @@ static int expected_addr_match(struct sockaddr *sa, void *expected,
 		struct in_addr *exp_in = (struct in_addr *) expected;
 
 		if (s->sin_addr.s_addr != exp_in->s_addr) {
-			log_error("%s address does not match expected %s",
+			log_error("%s address does not match expected %s\n",
 				  desc,
 				  inet_ntop(AF_INET, exp_in,
 					    addrstr, sizeof(addrstr)));
@@ -544,14 +694,14 @@ static int expected_addr_match(struct sockaddr *sa, void *expected,
 		struct in6_addr *exp_in = (struct in6_addr *) expected;
 
 		if (memcmp(&s6->sin6_addr, exp_in, sizeof(*exp_in))) {
-			log_error("%s address does not match expected %s",
+			log_error("%s address does not match expected %s\n",
 				  desc,
 				  inet_ntop(AF_INET6, exp_in,
 					    addrstr, sizeof(addrstr)));
 			rc = 1;
 		}
 	} else {
-		log_error("%s address does not match expected - unknown family",
+		log_error("%s address does not match expected - unknown family\n",
 			  desc);
 		rc = 1;
 	}
@@ -599,6 +749,160 @@ static int show_sockstat(int sd, struct sock_args *args)
 	return rc;
 }
 
+enum addr_type {
+	ADDR_TYPE_LOCAL,
+	ADDR_TYPE_REMOTE,
+	ADDR_TYPE_MCAST,
+	ADDR_TYPE_EXPECTED_LOCAL,
+	ADDR_TYPE_EXPECTED_REMOTE,
+	ADDR_TYPE_MD5_PREFIX,
+};
+
+static int convert_addr(struct sock_args *args, const char *_str,
+			enum addr_type atype)
+{
+	int pfx_len_max = args->version == AF_INET6 ? 128 : 32;
+	int family = args->version;
+	char *str, *dev, *sep;
+	struct in6_addr *in6;
+	struct in_addr  *in;
+	const char *desc;
+	void *addr;
+	int rc = 0;
+
+	str = strdup(_str);
+	if (!str)
+		return -ENOMEM;
+
+	switch (atype) {
+	case ADDR_TYPE_LOCAL:
+		desc = "local";
+		addr = &args->local_addr;
+		break;
+	case ADDR_TYPE_REMOTE:
+		desc = "remote";
+		addr = &args->remote_addr;
+		break;
+	case ADDR_TYPE_MCAST:
+		desc = "mcast grp";
+		addr = &args->grp;
+		break;
+	case ADDR_TYPE_EXPECTED_LOCAL:
+		desc = "expected local";
+		addr = &args->expected_laddr;
+		break;
+	case ADDR_TYPE_EXPECTED_REMOTE:
+		desc = "expected remote";
+		addr = &args->expected_raddr;
+		break;
+	case ADDR_TYPE_MD5_PREFIX:
+		desc = "md5 prefix";
+		if (family == AF_INET) {
+			args->md5_prefix.v4.sin_family = AF_INET;
+			addr = &args->md5_prefix.v4.sin_addr;
+		} else if (family == AF_INET6) {
+			args->md5_prefix.v6.sin6_family = AF_INET6;
+			addr = &args->md5_prefix.v6.sin6_addr;
+		} else
+			return 1;
+
+		sep = strchr(str, '/');
+		if (sep) {
+			*sep = '\0';
+			sep++;
+			if (str_to_uint(sep, 1, pfx_len_max,
+					&args->prefix_len) != 0) {
+				fprintf(stderr, "Invalid port\n");
+				return 1;
+			}
+		} else {
+			args->prefix_len = 0;
+		}
+		break;
+	default:
+		log_error("unknown address type\n");
+		exit(1);
+	}
+
+	switch (family) {
+	case AF_INET:
+		in  = (struct in_addr *) addr;
+		if (str) {
+			if (inet_pton(AF_INET, str, in) == 0) {
+				log_error("Invalid %s IP address\n", desc);
+				rc = -1;
+				goto out;
+			}
+		} else {
+			in->s_addr = htonl(INADDR_ANY);
+		}
+		break;
+
+	case AF_INET6:
+		dev = strchr(str, '%');
+		if (dev) {
+			*dev = '\0';
+			dev++;
+		}
+
+		in6 = (struct in6_addr *) addr;
+		if (str) {
+			if (inet_pton(AF_INET6, str, in6) == 0) {
+				log_error("Invalid %s IPv6 address\n", desc);
+				rc = -1;
+				goto out;
+			}
+		} else {
+			*in6 = in6addr_any;
+		}
+		if (dev) {
+			args->scope_id = get_ifidx(dev);
+			if (args->scope_id < 0) {
+				log_error("Invalid scope on %s IPv6 address\n",
+					  desc);
+				rc = -1;
+				goto out;
+			}
+		}
+		break;
+
+	default:
+		log_error("Invalid address family\n");
+	}
+
+out:
+	free(str);
+	return rc;
+}
+
+static int validate_addresses(struct sock_args *args)
+{
+	if (args->local_addr_str &&
+	    convert_addr(args, args->local_addr_str, ADDR_TYPE_LOCAL) < 0)
+		return 1;
+
+	if (args->remote_addr_str &&
+	    convert_addr(args, args->remote_addr_str, ADDR_TYPE_REMOTE) < 0)
+		return 1;
+
+	if (args->md5_prefix_str &&
+	    convert_addr(args, args->md5_prefix_str,
+			 ADDR_TYPE_MD5_PREFIX) < 0)
+		return 1;
+
+	if (args->expected_laddr_str &&
+	    convert_addr(args, args->expected_laddr_str,
+			 ADDR_TYPE_EXPECTED_LOCAL))
+		return 1;
+
+	if (args->expected_raddr_str &&
+	    convert_addr(args, args->expected_raddr_str,
+			 ADDR_TYPE_EXPECTED_REMOTE))
+		return 1;
+
+	return 0;
+}
+
 static int get_index_from_cmsg(struct msghdr *m)
 {
 	struct cmsghdr *cm;
@@ -723,6 +1027,11 @@ static int send_msg(int sd, void *addr, socklen_t alen, struct sock_args *args)
 			log_err_errno("write failed sending msg to peer");
 			return 1;
 		}
+	} else if (args->datagram_connect) {
+		if (send(sd, msg, msglen, 0) < 0) {
+			log_err_errno("send failed sending msg to peer");
+			return 1;
+		}
 	} else if (args->ifindex && args->use_cmsg) {
 		if (send_msg_cmsg(sd, addr, alen, args->ifindex, args->version))
 			return 1;
@@ -1053,6 +1362,17 @@ static int msock_init(struct sock_args *args, int server)
 		       (char *)&one, sizeof(one)) < 0)
 		log_err_errno("Setting SO_BROADCAST error");
 
+	if (set_dsfield(sd, AF_INET, args->dsfield) != 0)
+		goto out_err;
+
+	if (server) {
+		if (args->server_dontroute && set_dontroute(sd) != 0)
+			goto out_err;
+	} else {
+		if (args->client_dontroute && set_dontroute(sd) != 0)
+			goto out_err;
+	}
+
 	if (args->dev && bind_to_device(sd, args->dev) != 0)
 		goto out_err;
 	else if (args->use_setsockopt &&
@@ -1129,6 +1449,41 @@ static int bind_socket(int sd, struct sock_args *args)
 	return 0;
 }
 
+static int config_xfrm_policy(int sd, struct sock_args *args)
+{
+	struct xfrm_userpolicy_info policy = {};
+	int type = UDP_ENCAP_ESPINUDP;
+	int xfrm_af = IP_XFRM_POLICY;
+	int level = SOL_IP;
+
+	if (args->type != SOCK_DGRAM) {
+		log_error("Invalid socket type. Only DGRAM could be used for XFRM\n");
+		return 1;
+	}
+
+	policy.action = XFRM_POLICY_ALLOW;
+	policy.sel.family = args->version;
+	if (args->version == AF_INET6) {
+		xfrm_af = IPV6_XFRM_POLICY;
+		level = SOL_IPV6;
+	}
+
+	policy.dir = XFRM_POLICY_OUT;
+	if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+		return 1;
+
+	policy.dir = XFRM_POLICY_IN;
+	if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+		return 1;
+
+	if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) {
+		log_err_errno("Failed to set xfrm encap");
+		return 1;
+	}
+
+	return 0;
+}
+
 static int lsock_init(struct sock_args *args)
 {
 	long flags;
@@ -1146,12 +1501,21 @@ static int lsock_init(struct sock_args *args)
 	if (set_reuseport(sd) != 0)
 		goto err;
 
+	if (set_dsfield(sd, args->version, args->dsfield) != 0)
+		goto err;
+
+	if (args->server_dontroute && set_dontroute(sd) != 0)
+		goto err;
+
 	if (args->dev && bind_to_device(sd, args->dev) != 0)
 		goto err;
 	else if (args->use_setsockopt &&
 		 set_unicast_if(sd, args->ifindex, args->version))
 		goto err;
 
+	if (args->use_freebind && set_freebind(sd, args->version))
+		goto err;
+
 	if (bind_socket(sd, args))
 		goto err;
 
@@ -1172,6 +1536,11 @@ static int lsock_init(struct sock_args *args)
 	if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0)
 		log_err_errno("Failed to set close-on-exec flag");
 
+	if (args->use_xfrm && config_xfrm_policy(sd, args)) {
+		log_err_errno("Failed to set xfrm policy");
+		goto err;
+	}
+
 out:
 	return sd;
 
@@ -1180,8 +1549,19 @@ err:
 	return -1;
 }
 
-static int do_server(struct sock_args *args)
+static void ipc_write(int fd, int message)
+{
+	/* Not in both_mode, so there's no process to signal */
+	if (fd < 0)
+		return;
+
+	if (write(fd, &message, sizeof(message)) < 0)
+		log_err_errno("Failed to send client status");
+}
+
+static int do_server(struct sock_args *args, int ipc_fd)
 {
+	/* ipc_fd = -1 if no parent process to signal */
 	struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL;
 	unsigned char addr[sizeof(struct sockaddr_in6)] = {};
 	socklen_t alen = sizeof(addr);
@@ -1190,6 +1570,20 @@ static int do_server(struct sock_args *args)
 	fd_set rfds;
 	int rc;
 
+	if (args->serverns) {
+		if (switch_ns(args->serverns)) {
+			log_error("Could not set server netns to %s\n",
+				  args->serverns);
+			goto err_exit;
+		}
+		log_msg("Switched server netns\n");
+	}
+
+	args->dev = args->server_dev;
+	args->expected_dev = args->expected_server_dev;
+	if (resolve_devices(args) || validate_addresses(args))
+		goto err_exit;
+
 	if (prog_timeout)
 		ptval = &timeout;
 
@@ -1199,14 +1593,16 @@ static int do_server(struct sock_args *args)
 		lsd = lsock_init(args);
 
 	if (lsd < 0)
-		return 1;
+		goto err_exit;
 
 	if (args->bind_test_only) {
 		close(lsd);
+		ipc_write(ipc_fd, 1);
 		return 0;
 	}
 
 	if (args->type != SOCK_STREAM) {
+		ipc_write(ipc_fd, 1);
 		rc = msg_loop(0, lsd, (void *) addr, alen, args);
 		close(lsd);
 		return rc;
@@ -1214,11 +1610,11 @@ static int do_server(struct sock_args *args)
 
 	if (args->password && tcp_md5_remote(lsd, args)) {
 		close(lsd);
-		return 1;
+		goto err_exit;
 	}
 
+	ipc_write(ipc_fd, 1);
 	while (1) {
-		log_msg("\n");
 		log_msg("waiting for client connection.\n");
 		FD_ZERO(&rfds);
 		FD_SET(lsd, &rfds);
@@ -1264,6 +1660,9 @@ static int do_server(struct sock_args *args)
 	close(lsd);
 
 	return rc;
+err_exit:
+	ipc_write(ipc_fd, 0);
+	return 1;
 }
 
 static int wait_for_connect(int sd)
@@ -1321,6 +1720,12 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
 	if (set_reuseport(sd) != 0)
 		goto err;
 
+	if (set_dsfield(sd, args->version, args->dsfield) != 0)
+		goto err;
+
+	if (args->client_dontroute && set_dontroute(sd) != 0)
+		goto err;
+
 	if (args->dev && bind_to_device(sd, args->dev) != 0)
 		goto err;
 	else if (args->use_setsockopt &&
@@ -1330,7 +1735,7 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
 	if (args->has_local_ip && bind_socket(sd, args))
 		goto err;
 
-	if (args->type != SOCK_STREAM)
+	if (args->type != SOCK_STREAM && !args->datagram_connect)
 		goto out;
 
 	if (args->password && tcp_md5sig(sd, addr, alen, args))
@@ -1375,6 +1780,26 @@ static int do_client(struct sock_args *args)
 		return 1;
 	}
 
+	if (args->clientns) {
+		if (switch_ns(args->clientns)) {
+			log_error("Could not set client netns to %s\n",
+				  args->clientns);
+			return 1;
+		}
+		log_msg("Switched client netns\n");
+	}
+
+	args->local_addr_str = args->client_local_addr_str;
+	if (resolve_devices(args) || validate_addresses(args))
+		return 1;
+
+	if ((args->use_setsockopt || args->use_cmsg) && !args->ifindex) {
+		fprintf(stderr, "Device binding not specified\n");
+		return 1;
+	}
+	if (args->use_setsockopt || args->use_cmsg)
+		args->dev = NULL;
+
 	switch (args->version) {
 	case AF_INET:
 		sin.sin_port = htons(args->port);
@@ -1394,6 +1819,8 @@ static int do_client(struct sock_args *args)
 		break;
 	}
 
+	args->password = args->client_pw;
+
 	if (args->has_grp)
 		sd = msock_client(args);
 	else
@@ -1419,132 +1846,6 @@ out:
 	return rc;
 }
 
-enum addr_type {
-	ADDR_TYPE_LOCAL,
-	ADDR_TYPE_REMOTE,
-	ADDR_TYPE_MCAST,
-	ADDR_TYPE_EXPECTED_LOCAL,
-	ADDR_TYPE_EXPECTED_REMOTE,
-	ADDR_TYPE_MD5_PREFIX,
-};
-
-static int convert_addr(struct sock_args *args, const char *_str,
-			enum addr_type atype)
-{
-	int pfx_len_max = args->version == AF_INET6 ? 128 : 32;
-	int family = args->version;
-	char *str, *dev, *sep;
-	struct in6_addr *in6;
-	struct in_addr  *in;
-	const char *desc;
-	void *addr;
-	int rc = 0;
-
-	str = strdup(_str);
-	if (!str)
-		return -ENOMEM;
-
-	switch (atype) {
-	case ADDR_TYPE_LOCAL:
-		desc = "local";
-		addr = &args->local_addr;
-		break;
-	case ADDR_TYPE_REMOTE:
-		desc = "remote";
-		addr = &args->remote_addr;
-		break;
-	case ADDR_TYPE_MCAST:
-		desc = "mcast grp";
-		addr = &args->grp;
-		break;
-	case ADDR_TYPE_EXPECTED_LOCAL:
-		desc = "expected local";
-		addr = &args->expected_laddr;
-		break;
-	case ADDR_TYPE_EXPECTED_REMOTE:
-		desc = "expected remote";
-		addr = &args->expected_raddr;
-		break;
-	case ADDR_TYPE_MD5_PREFIX:
-		desc = "md5 prefix";
-		if (family == AF_INET) {
-			args->md5_prefix.v4.sin_family = AF_INET;
-			addr = &args->md5_prefix.v4.sin_addr;
-		} else if (family == AF_INET6) {
-			args->md5_prefix.v6.sin6_family = AF_INET6;
-			addr = &args->md5_prefix.v6.sin6_addr;
-		} else
-			return 1;
-
-		sep = strchr(str, '/');
-		if (sep) {
-			*sep = '\0';
-			sep++;
-			if (str_to_uint(sep, 1, pfx_len_max,
-					&args->prefix_len) != 0) {
-				fprintf(stderr, "Invalid port\n");
-				return 1;
-			}
-		} else {
-			args->prefix_len = pfx_len_max;
-		}
-		break;
-	default:
-		log_error("unknown address type");
-		exit(1);
-	}
-
-	switch (family) {
-	case AF_INET:
-		in  = (struct in_addr *) addr;
-		if (str) {
-			if (inet_pton(AF_INET, str, in) == 0) {
-				log_error("Invalid %s IP address\n", desc);
-				rc = -1;
-				goto out;
-			}
-		} else {
-			in->s_addr = htonl(INADDR_ANY);
-		}
-		break;
-
-	case AF_INET6:
-		dev = strchr(str, '%');
-		if (dev) {
-			*dev = '\0';
-			dev++;
-		}
-
-		in6 = (struct in6_addr *) addr;
-		if (str) {
-			if (inet_pton(AF_INET6, str, in6) == 0) {
-				log_error("Invalid %s IPv6 address\n", desc);
-				rc = -1;
-				goto out;
-			}
-		} else {
-			*in6 = in6addr_any;
-		}
-		if (dev) {
-			args->scope_id = get_ifidx(dev);
-			if (args->scope_id < 0) {
-				log_error("Invalid scope on %s IPv6 address\n",
-					  desc);
-				rc = -1;
-				goto out;
-			}
-		}
-		break;
-
-	default:
-		log_error("Invalid address family\n");
-	}
-
-out:
-	free(str);
-	return rc;
-}
-
 static char *random_msg(int len)
 {
 	int i, n = 0, olen = len + 1;
@@ -1563,12 +1864,86 @@ static char *random_msg(int len)
 		n += i;
 		len -= i;
 	}
-	i = snprintf(m + n, olen - n, "%.*s", len,
-		     "abcdefghijklmnopqrstuvwxyz");
+
+	snprintf(m + n, olen - n, "%.*s", len,
+	     "abcdefghijklmnopqrstuvwxyz");
 	return m;
 }
 
-#define GETOPT_STR  "sr:l:p:t:g:P:DRn:M:m:d:SCi6L:0:1:2:Fbq"
+static int ipc_child(int fd, struct sock_args *args)
+{
+	char *outbuf, *errbuf;
+	int rc = 1;
+
+	outbuf = malloc(4096);
+	errbuf = malloc(4096);
+	if (!outbuf || !errbuf) {
+		fprintf(stderr, "server: Failed to allocate buffers for stdout and stderr\n");
+		goto out;
+	}
+
+	setbuffer(stdout, outbuf, 4096);
+	setbuffer(stderr, errbuf, 4096);
+
+	server_mode = 1; /* to tell log_msg in case we are in both_mode */
+
+	/* when running in both mode, address validation applies
+	 * solely to client side
+	 */
+	args->has_expected_laddr = 0;
+	args->has_expected_raddr = 0;
+
+	rc = do_server(args, fd);
+
+out:
+	free(outbuf);
+	free(errbuf);
+
+	return rc;
+}
+
+static int ipc_parent(int cpid, int fd, struct sock_args *args)
+{
+	int client_status;
+	int status;
+	int buf;
+
+	/* do the client-side function here in the parent process,
+	 * waiting to be told when to continue
+	 */
+	if (read(fd, &buf, sizeof(buf)) <= 0) {
+		log_err_errno("Failed to read IPC status from status");
+		return 1;
+	}
+	if (!buf) {
+		log_error("Server failed; can not continue\n");
+		return 1;
+	}
+	log_msg("Server is ready\n");
+
+	client_status = do_client(args);
+	log_msg("parent is done!\n");
+
+	if (kill(cpid, 0) == 0)
+		kill(cpid, SIGKILL);
+
+	wait(&status);
+	return client_status;
+}
+
+#define GETOPT_STR  "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
+#define OPT_FORCE_BIND_KEY_IFINDEX 1001
+#define OPT_NO_BIND_KEY_IFINDEX 1002
+#define OPT_CLIENT_DONTROUTE 1003
+#define OPT_SERVER_DONTROUTE 1004
+
+static struct option long_opts[] = {
+	{"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX},
+	{"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX},
+	{"client-dontroute", 0, 0, OPT_CLIENT_DONTROUTE},
+	{"server-dontroute", 0, 0, OPT_SERVER_DONTROUTE},
+	{0, 0, 0, 0}
+};
 
 static void print_usage(char *prog)
 {
@@ -1582,28 +1957,50 @@ static void print_usage(char *prog)
 	"    -t            timeout seconds (default: none)\n"
 	"\n"
 	"Optional:\n"
+	"    -B            do both client and server via fork and IPC\n"
+	"    -N ns         set client to network namespace ns (requires root)\n"
+	"    -O ns         set server to network namespace ns (requires root)\n"
 	"    -F            Restart server loop\n"
 	"    -6            IPv6 (default is IPv4)\n"
 	"    -P proto      protocol for socket: icmp, ospf (default: none)\n"
 	"    -D|R          datagram (D) / raw (R) socket (default stream)\n"
-	"    -l addr       local address to bind to\n"
+	"    -l addr       local address to bind to in server mode\n"
+	"    -c addr       local address to bind to in client mode\n"
+	"    -Q dsfield    DS Field value of the socket (the IP_TOS or\n"
+	"                  IPV6_TCLASS socket option)\n"
+	"    -x            configure XFRM policy on socket\n"
 	"\n"
 	"    -d dev        bind socket to given device name\n"
+	"    -I dev        bind socket to given device name - server mode\n"
 	"    -S            use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n"
 	"                  to set device binding\n"
+	"    -U            Use connect() and send() for datagram sockets\n"
+	"    -f            bind socket with the IP[V6]_FREEBIND option\n"
 	"    -C            use cmsg and IP_PKTINFO to specify device binding\n"
 	"\n"
 	"    -L len        send random message of given length\n"
 	"    -n num        number of times to send message\n"
 	"\n"
 	"    -M password   use MD5 sum protection\n"
+	"    -X password   MD5 password for client mode\n"
 	"    -m prefix/len prefix and length to use for MD5 key\n"
+	"    --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n"
+	"    --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n"
+	"        (default: only if -I is passed)\n"
+	"    --client-dontroute: don't use gateways for client socket: send\n"
+	"                        packets only if destination is on link (see\n"
+	"                        SO_DONTROUTE in socket(7))\n"
+	"    --server-dontroute: don't use gateways for server socket: send\n"
+	"                        packets only if destination is on link (see\n"
+	"                        SO_DONTROUTE in socket(7))\n"
+	"\n"
 	"    -g grp        multicast group (e.g., 239.1.1.1)\n"
 	"    -i            interactive mode (default is echo and terminate)\n"
 	"\n"
 	"    -0 addr       Expected local address\n"
 	"    -1 addr       Expected remote address\n"
 	"    -2 dev        Expected device name (or index) to receive packet\n"
+	"    -3 dev        Expected device name (or index) to receive packets - server mode\n"
 	"\n"
 	"    -b            Bind test only.\n"
 	"    -q            Be quiet. Run test without printing anything.\n"
@@ -1618,8 +2015,11 @@ int main(int argc, char *argv[])
 		.port    = DEFAULT_PORT,
 	};
 	struct protoent *pe;
+	int both_mode = 0;
 	unsigned int tmp;
 	int forever = 0;
+	int fd[2];
+	int cpid;
 
 	/* process inputs */
 	extern char *optarg;
@@ -1629,8 +2029,11 @@ int main(int argc, char *argv[])
 	 * process input args
 	 */
 
-	while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) {
+	while ((rc = getopt_long(argc, argv, GETOPT_STR, long_opts, NULL)) != -1) {
 		switch (rc) {
+		case 'B':
+			both_mode = 1;
+			break;
 		case 's':
 			server_mode = 1;
 			break;
@@ -1639,13 +2042,22 @@ int main(int argc, char *argv[])
 			break;
 		case 'l':
 			args.has_local_ip = 1;
-			if (convert_addr(&args, optarg, ADDR_TYPE_LOCAL) < 0)
-				return 1;
+			args.local_addr_str = optarg;
 			break;
 		case 'r':
 			args.has_remote_ip = 1;
-			if (convert_addr(&args, optarg, ADDR_TYPE_REMOTE) < 0)
+			args.remote_addr_str = optarg;
+			break;
+		case 'c':
+			args.has_local_ip = 1;
+			args.client_local_addr_str = optarg;
+			break;
+		case 'Q':
+			if (str_to_uint(optarg, 0, 255, &tmp) != 0) {
+				fprintf(stderr, "Invalid DS Field\n");
 				return 1;
+			}
+			args.dsfield = tmp;
 			break;
 		case 'p':
 			if (str_to_uint(optarg, 1, 65535, &tmp) != 0) {
@@ -1685,29 +2097,50 @@ int main(int argc, char *argv[])
 		case 'n':
 			iter = atoi(optarg);
 			break;
+		case 'N':
+			args.clientns = optarg;
+			break;
+		case 'O':
+			args.serverns = optarg;
+			break;
 		case 'L':
 			msg = random_msg(atoi(optarg));
 			break;
 		case 'M':
 			args.password = optarg;
 			break;
+		case OPT_FORCE_BIND_KEY_IFINDEX:
+			args.bind_key_ifindex = 1;
+			break;
+		case OPT_NO_BIND_KEY_IFINDEX:
+			args.bind_key_ifindex = -1;
+			break;
+		case OPT_CLIENT_DONTROUTE:
+			args.client_dontroute = 1;
+			break;
+		case OPT_SERVER_DONTROUTE:
+			args.server_dontroute = 1;
+			break;
+		case 'X':
+			args.client_pw = optarg;
+			break;
 		case 'm':
-			if (convert_addr(&args, optarg, ADDR_TYPE_MD5_PREFIX) < 0)
-				return 1;
+			args.md5_prefix_str = optarg;
 			break;
 		case 'S':
 			args.use_setsockopt = 1;
 			break;
+		case 'f':
+			args.use_freebind = 1;
+			break;
 		case 'C':
 			args.use_cmsg = 1;
 			break;
 		case 'd':
 			args.dev = optarg;
-			args.ifindex = get_ifidx(optarg);
-			if (args.ifindex < 0) {
-				fprintf(stderr, "Invalid device name\n");
-				return 1;
-			}
+			break;
+		case 'I':
+			args.server_dev = optarg;
 			break;
 		case 'i':
 			interactive = 1;
@@ -1726,32 +2159,27 @@ int main(int argc, char *argv[])
 			break;
 		case '0':
 			args.has_expected_laddr = 1;
-			if (convert_addr(&args, optarg,
-					 ADDR_TYPE_EXPECTED_LOCAL))
-				return 1;
+			args.expected_laddr_str = optarg;
 			break;
 		case '1':
 			args.has_expected_raddr = 1;
-			if (convert_addr(&args, optarg,
-					 ADDR_TYPE_EXPECTED_REMOTE))
-				return 1;
-
+			args.expected_raddr_str = optarg;
 			break;
 		case '2':
-			if (str_to_uint(optarg, 0, INT_MAX, &tmp) == 0) {
-				args.expected_ifindex = (int)tmp;
-			} else {
-				args.expected_ifindex = get_ifidx(optarg);
-				if (args.expected_ifindex < 0) {
-					fprintf(stderr,
-						"Invalid expected device\n");
-					return 1;
-				}
-			}
+			args.expected_dev = optarg;
+			break;
+		case '3':
+			args.expected_server_dev = optarg;
 			break;
 		case 'q':
 			quiet = 1;
 			break;
+		case 'x':
+			args.use_xfrm = 1;
+			break;
+		case 'U':
+			args.datagram_connect = 1;
+			break;
 		default:
 			print_usage(argv[0]);
 			return 1;
@@ -1759,23 +2187,17 @@ int main(int argc, char *argv[])
 	}
 
 	if (args.password &&
-	    ((!args.has_remote_ip && !args.prefix_len) || args.type != SOCK_STREAM)) {
+	    ((!args.has_remote_ip && !args.md5_prefix_str) ||
+	      args.type != SOCK_STREAM)) {
 		log_error("MD5 passwords apply to TCP only and require a remote ip for the password\n");
 		return 1;
 	}
 
-	if (args.prefix_len && !args.password) {
+	if (args.md5_prefix_str && !args.password) {
 		log_error("Prefix range for MD5 protection specified without a password\n");
 		return 1;
 	}
 
-	if ((args.use_setsockopt || args.use_cmsg) && !args.ifindex) {
-		fprintf(stderr, "Device binding not specified\n");
-		return 1;
-	}
-	if (args.use_setsockopt || args.use_cmsg)
-		args.dev = NULL;
-
 	if (iter == 0) {
 		fprintf(stderr, "Invalid number of messages to send\n");
 		return 1;
@@ -1792,7 +2214,7 @@ int main(int argc, char *argv[])
 		return 1;
 	}
 
-	if (!server_mode && !args.has_grp &&
+	if ((both_mode || !server_mode) && !args.has_grp &&
 	    !args.has_remote_ip && !args.has_local_ip) {
 		fprintf(stderr,
 			"Local (server mode) or remote IP (client IP) required\n");
@@ -1804,9 +2226,26 @@ int main(int argc, char *argv[])
 		msg = NULL;
 	}
 
+	if (both_mode) {
+		if (pipe(fd) < 0) {
+			perror("pipe");
+			exit(1);
+		}
+
+		cpid = fork();
+		if (cpid < 0) {
+			perror("fork");
+			exit(1);
+		}
+		if (cpid)
+			return ipc_parent(cpid, fd[0], &args);
+
+		return ipc_child(fd[1], &args);
+	}
+
 	if (server_mode) {
 		do {
-			rc = do_server(&args);
+			rc = do_server(&args, -1);
 		} while (forever);
 
 		return rc;
diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile
new file mode 100644
index 000000000000..2f1508abc826
--- /dev/null
+++ b/tools/testing/selftests/net/openvswitch/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := openvswitch.sh
+
+TEST_FILES := ovs-dpctl.py
+
+EXTRA_CLEAN := test_netlink_checks
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
new file mode 100755
index 000000000000..5cae53543849
--- /dev/null
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -0,0 +1,722 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# OVS kernel module self tests
+
+trap ovs_exit_sig EXIT TERM INT ERR
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+TRACING=0
+
+tests="
+	arp_ping				eth-arp: Basic arp ping between two NS
+	ct_connect_v4				ip4-ct-xon: Basic ipv4 tcp connection using ct
+	connect_v4				ip4-xon: Basic ipv4 ping between two NS
+	nat_connect_v4				ip4-nat-xon: Basic ipv4 tcp connection via NAT
+	nat_related_v4				ip4-nat-related: ICMP related matches work with SNAT
+	netlink_checks				ovsnl: validate netlink attrs and settings
+	upcall_interfaces			ovs: test the upcall interfaces
+	drop_reason				drop: test drop reasons are emitted"
+
+info() {
+    [ $VERBOSE = 0 ] || echo $*
+}
+
+ovs_base=`pwd`
+sbxs=
+sbx_add () {
+	info "adding sandbox '$1'"
+
+	sbxs="$sbxs $1"
+
+	NO_BIN=0
+
+	# Create sandbox.
+	local d="$ovs_base"/$1
+	if [ -e $d ]; then
+		info "removing $d"
+		rm -rf "$d"
+	fi
+	mkdir "$d" || return 1
+	ovs_setenv $1
+}
+
+ovs_exit_sig() {
+	[ -e ${ovs_dir}/cleanup ] && . "$ovs_dir/cleanup"
+}
+
+on_exit() {
+	echo "$1" > ${ovs_dir}/cleanup.tmp
+	cat ${ovs_dir}/cleanup >> ${ovs_dir}/cleanup.tmp
+	mv ${ovs_dir}/cleanup.tmp ${ovs_dir}/cleanup
+}
+
+ovs_setenv() {
+	sandbox=$1
+
+	ovs_dir=$ovs_base${1:+/$1}; export ovs_dir
+
+	test -e ${ovs_dir}/cleanup || : > ${ovs_dir}/cleanup
+}
+
+ovs_sbx() {
+	if test "X$2" != X; then
+		(ovs_setenv $1; shift; "$@" >> ${ovs_dir}/debug.log)
+	else
+		ovs_setenv $1
+	fi
+}
+
+ovs_add_dp () {
+	info "Adding DP/Bridge IF: sbx:$1 dp:$2 {$3, $4, $5}"
+	sbxname="$1"
+	shift
+	ovs_sbx "$sbxname" python3 $ovs_base/ovs-dpctl.py add-dp $*
+	on_exit "ovs_sbx $sbxname python3 $ovs_base/ovs-dpctl.py del-dp $1;"
+}
+
+ovs_add_if () {
+	info "Adding IF to DP: br:$2 if:$3"
+	if [ "$4" != "-u" ]; then
+		ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \
+		    || return 1
+	else
+		python3 $ovs_base/ovs-dpctl.py add-if \
+		    -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err &
+		pid=$!
+		on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null"
+	fi
+}
+
+ovs_del_if () {
+	info "Deleting IF from DP: br:$2 if:$3"
+	ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-if "$2" "$3" || return 1
+}
+
+ovs_netns_spawn_daemon() {
+	sbx=$1
+	shift
+	netns=$1
+	shift
+	info "spawning cmd: $*"
+	ip netns exec $netns $*  >> $ovs_dir/stdout  2>> $ovs_dir/stderr &
+	pid=$!
+	ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null"
+}
+
+ovs_add_netns_and_veths () {
+	info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}"
+	ovs_sbx "$1" ip netns add "$3" || return 1
+	on_exit "ovs_sbx $1 ip netns del $3"
+	ovs_sbx "$1" ip link add "$4" type veth peer name "$5" || return 1
+	on_exit "ovs_sbx $1 ip link del $4 >/dev/null 2>&1"
+	ovs_sbx "$1" ip link set "$4" up || return 1
+	ovs_sbx "$1" ip link set "$5" netns "$3" || return 1
+	ovs_sbx "$1" ip netns exec "$3" ip link set "$5" up || return 1
+
+	if [ "$6" != "" ]; then
+		ovs_sbx "$1" ip netns exec "$3" ip addr add "$6" dev "$5" \
+		    || return 1
+	fi
+
+	if [ "$7" != "-u" ]; then
+		ovs_add_if "$1" "$2" "$4" || return 1
+	else
+		ovs_add_if "$1" "$2" "$4" -u || return 1
+	fi
+
+	[ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \
+			tcpdump -i any -s 65535
+
+	return 0
+}
+
+ovs_add_flow () {
+	info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4"
+	ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4"
+	if [ $? -ne 0 ]; then
+		echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log
+		return 1
+	fi
+	return 0
+}
+
+ovs_del_flows () {
+	info "Deleting all flows from DP: sbx:$1 br:$2"
+	ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2"
+	return 0
+}
+
+ovs_drop_record_and_run () {
+	local sbx=$1
+	shift
+
+	perf record -a -q -e skb:kfree_skb -o ${ovs_dir}/perf.data $* \
+		>> ${ovs_dir}/stdout 2>> ${ovs_dir}/stderr
+	return $?
+}
+
+ovs_drop_reason_count()
+{
+	local reason=$1
+
+	local perf_output=`perf script -i ${ovs_dir}/perf.data -F trace:event,trace`
+	local pattern="skb:kfree_skb:.*reason: $reason"
+
+	return `echo "$perf_output" | grep "$pattern" | wc -l`
+}
+
+usage() {
+	echo
+	echo "$0 [OPTIONS] [TEST]..."
+	echo "If no TEST argument is given, all tests will be run."
+	echo
+	echo "Options"
+	echo "  -t: capture traffic via tcpdump"
+	echo "  -v: verbose"
+	echo "  -p: pause on failure"
+	echo
+	echo "Available tests${tests}"
+	exit 1
+}
+
+# drop_reason test
+# - drop packets and verify the right drop reason is reported
+test_drop_reason() {
+	which perf >/dev/null 2>&1 || return $ksft_skip
+
+	sbx_add "test_drop_reason" || return $?
+
+	ovs_add_dp "test_drop_reason" dropreason || return 1
+
+	info "create namespaces"
+	for ns in client server; do
+		ovs_add_netns_and_veths "test_drop_reason" "dropreason" "$ns" \
+			"${ns:0:1}0" "${ns:0:1}1" || return 1
+	done
+
+	# Setup client namespace
+	ip netns exec client ip addr add 172.31.110.10/24 dev c1
+	ip netns exec client ip link set c1 up
+
+	# Setup server namespace
+	ip netns exec server ip addr add 172.31.110.20/24 dev s1
+	ip netns exec server ip link set s1 up
+
+	# Check if drop reasons can be sent
+	ovs_add_flow "test_drop_reason" dropreason \
+		'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null
+	if [ $? == 1 ]; then
+		info "no support for drop reasons - skipping"
+		ovs_exit_sig
+		return $ksft_skip
+	fi
+
+	ovs_del_flows "test_drop_reason" dropreason
+
+	# Allow ARP
+	ovs_add_flow "test_drop_reason" dropreason \
+		'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+	ovs_add_flow "test_drop_reason" dropreason \
+		'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+
+	# Allow client ICMP traffic but drop return path
+	ovs_add_flow "test_drop_reason" dropreason \
+		"in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" '2'
+	ovs_add_flow "test_drop_reason" dropreason \
+		"in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop'
+
+	ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20
+	ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION
+	if [[ "$?" -ne "2" ]]; then
+		info "Did not detect expected drops: $?"
+		return 1
+	fi
+
+	# Drop UDP 6000 traffic with an explicit action and an error code.
+	ovs_add_flow "test_drop_reason" dropreason \
+		"in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=6000)" \
+                'drop(42)'
+	# Drop UDP 7000 traffic with an explicit action with no error code.
+	ovs_add_flow "test_drop_reason" dropreason \
+		"in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=7000)" \
+                'drop(0)'
+
+	ovs_drop_record_and_run \
+            "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000
+	ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR
+	if [[ "$?" -ne "1" ]]; then
+		info "Did not detect expected explicit error drops: $?"
+		return 1
+	fi
+
+	ovs_drop_record_and_run \
+            "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000
+	ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION
+	if [[ "$?" -ne "1" ]]; then
+		info "Did not detect expected explicit drops: $?"
+		return 1
+	fi
+
+	return 0
+}
+
+# arp_ping test
+# - client has 1500 byte MTU
+# - server has 1500 byte MTU
+# - send ARP ping between two ns
+test_arp_ping () {
+
+	which arping >/dev/null 2>&1 || return $ksft_skip
+
+	sbx_add "test_arp_ping" || return $?
+
+	ovs_add_dp "test_arp_ping" arpping || return 1
+
+	info "create namespaces"
+	for ns in client server; do
+		ovs_add_netns_and_veths "test_arp_ping" "arpping" "$ns" \
+		    "${ns:0:1}0" "${ns:0:1}1" || return 1
+	done
+
+	# Setup client namespace
+	ip netns exec client ip addr add 172.31.110.10/24 dev c1
+	ip netns exec client ip link set c1 up
+	HW_CLIENT=`ip netns exec client ip link show dev c1 | grep -E 'link/ether [0-9a-f:]+' | awk '{print $2;}'`
+	info "Client hwaddr: $HW_CLIENT"
+
+	# Setup server namespace
+	ip netns exec server ip addr add 172.31.110.20/24 dev s1
+	ip netns exec server ip link set s1 up
+	HW_SERVER=`ip netns exec server ip link show dev s1 | grep -E 'link/ether [0-9a-f:]+' | awk '{print $2;}'`
+	info "Server hwaddr: $HW_SERVER"
+
+	ovs_add_flow "test_arp_ping" arpping \
+		"in_port(1),eth(),eth_type(0x0806),arp(sip=172.31.110.10,tip=172.31.110.20,sha=$HW_CLIENT,tha=ff:ff:ff:ff:ff:ff)" '2' || return 1
+	ovs_add_flow "test_arp_ping" arpping \
+		"in_port(2),eth(),eth_type(0x0806),arp()" '1' || return 1
+
+	ovs_sbx "test_arp_ping" ip netns exec client arping -I c1 172.31.110.20 -c 1 || return 1
+
+	return 0
+}
+
+# ct_connect_v4 test
+#  - client has 1500 byte MTU
+#  - server has 1500 byte MTU
+#  - use ICMP to ping in each direction
+#  - only allow CT state stuff to pass through new in c -> s
+test_ct_connect_v4 () {
+
+	which nc >/dev/null 2>/dev/null || return $ksft_skip
+
+	sbx_add "test_ct_connect_v4" || return $?
+
+	ovs_add_dp "test_ct_connect_v4" ct4 || return 1
+	info "create namespaces"
+	for ns in client server; do
+		ovs_add_netns_and_veths "test_ct_connect_v4" "ct4" "$ns" \
+		    "${ns:0:1}0" "${ns:0:1}1" || return 1
+	done
+
+	ip netns exec client ip addr add 172.31.110.10/24 dev c1
+	ip netns exec client ip link set c1 up
+	ip netns exec server ip addr add 172.31.110.20/24 dev s1
+	ip netns exec server ip link set s1 up
+
+	# Add forwarding for ARP and ip packets - completely wildcarded
+	ovs_add_flow "test_ct_connect_v4" ct4 \
+		'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+	ovs_add_flow "test_ct_connect_v4" ct4 \
+		'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+	ovs_add_flow "test_ct_connect_v4" ct4 \
+		     'ct_state(-trk),eth(),eth_type(0x0800),ipv4()' \
+		     'ct(commit),recirc(0x1)' || return 1
+	ovs_add_flow "test_ct_connect_v4" ct4 \
+		     'recirc_id(0x1),ct_state(+trk+new),in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' \
+		     '2' || return 1
+	ovs_add_flow "test_ct_connect_v4" ct4 \
+		     'recirc_id(0x1),ct_state(+trk+est),in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' \
+		     '2' || return 1
+	ovs_add_flow "test_ct_connect_v4" ct4 \
+		     'recirc_id(0x1),ct_state(+trk+est),in_port(2),eth(),eth_type(0x0800),ipv4(dst=172.31.110.10)' \
+		     '1' || return 1
+	ovs_add_flow "test_ct_connect_v4" ct4 \
+		     'recirc_id(0x1),ct_state(+trk+inv),eth(),eth_type(0x0800),ipv4()' 'drop' || \
+		     return 1
+
+	# do a ping
+	ovs_sbx "test_ct_connect_v4" ip netns exec client ping 172.31.110.20 -c 3 || return 1
+
+	# create an echo server in 'server'
+	echo "server" | \
+		ovs_netns_spawn_daemon "test_ct_connect_v4" "server" \
+				nc -lvnp 4443
+	ovs_sbx "test_ct_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.20 4443 || return 1
+
+	# Now test in the other direction (should fail)
+	echo "client" | \
+		ovs_netns_spawn_daemon "test_ct_connect_v4" "client" \
+				nc -lvnp 4443
+	ovs_sbx "test_ct_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.10 4443
+	if [ $? == 0 ]; then
+	   info "ct connect to client was successful"
+	   return 1
+	fi
+
+	info "done..."
+	return 0
+}
+
+# connect_v4 test
+#  - client has 1500 byte MTU
+#  - server has 1500 byte MTU
+#  - use ICMP to ping in each direction
+test_connect_v4 () {
+
+	sbx_add "test_connect_v4" || return $?
+
+	ovs_add_dp "test_connect_v4" cv4 || return 1
+
+	info "create namespaces"
+	for ns in client server; do
+		ovs_add_netns_and_veths "test_connect_v4" "cv4" "$ns" \
+		    "${ns:0:1}0" "${ns:0:1}1" || return 1
+	done
+
+
+	ip netns exec client ip addr add 172.31.110.10/24 dev c1
+	ip netns exec client ip link set c1 up
+	ip netns exec server ip addr add 172.31.110.20/24 dev s1
+	ip netns exec server ip link set s1 up
+
+	# Add forwarding for ARP and ip packets - completely wildcarded
+	ovs_add_flow "test_connect_v4" cv4 \
+		'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+	ovs_add_flow "test_connect_v4" cv4 \
+		'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+	ovs_add_flow "test_connect_v4" cv4 \
+		'in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' '2' || return 1
+	ovs_add_flow "test_connect_v4" cv4 \
+		'in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20)' '1' || return 1
+
+	# do a ping
+	ovs_sbx "test_connect_v4" ip netns exec client ping 172.31.110.20 -c 3 || return 1
+
+	info "done..."
+	return 0
+}
+
+# nat_connect_v4 test
+#  - client has 1500 byte MTU
+#  - server has 1500 byte MTU
+#  - use ICMP to ping in each direction
+#  - only allow CT state stuff to pass through new in c -> s
+test_nat_connect_v4 () {
+	which nc >/dev/null 2>/dev/null || return $ksft_skip
+
+	sbx_add "test_nat_connect_v4" || return $?
+
+	ovs_add_dp "test_nat_connect_v4" nat4 || return 1
+	info "create namespaces"
+	for ns in client server; do
+		ovs_add_netns_and_veths "test_nat_connect_v4" "nat4" "$ns" \
+		    "${ns:0:1}0" "${ns:0:1}1" || return 1
+	done
+
+	ip netns exec client ip addr add 172.31.110.10/24 dev c1
+	ip netns exec client ip link set c1 up
+	ip netns exec server ip addr add 172.31.110.20/24 dev s1
+	ip netns exec server ip link set s1 up
+
+	ip netns exec client ip route add default via 172.31.110.20
+
+	ovs_add_flow "test_nat_connect_v4" nat4 \
+		'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+	ovs_add_flow "test_nat_connect_v4" nat4 \
+		'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+	ovs_add_flow "test_nat_connect_v4" nat4 \
+		"ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20)" \
+		"ct(commit,nat(dst=172.31.110.20)),recirc(0x1)"
+	ovs_add_flow "test_nat_connect_v4" nat4 \
+		"ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \
+		"ct(commit,nat),recirc(0x2)"
+
+	ovs_add_flow "test_nat_connect_v4" nat4 \
+		"recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"
+	ovs_add_flow "test_nat_connect_v4" nat4 \
+		"recirc_id(0x2),ct_state(+trk-inv),in_port(2),eth(),eth_type(0x0800),ipv4()" "1"
+
+	# do a ping
+	ovs_sbx "test_nat_connect_v4" ip netns exec client ping 192.168.0.20 -c 3 || return 1
+
+	# create an echo server in 'server'
+	echo "server" | \
+		ovs_netns_spawn_daemon "test_nat_connect_v4" "server" \
+				nc -lvnp 4443
+	ovs_sbx "test_nat_connect_v4" ip netns exec client nc -i 1 -zv 192.168.0.20 4443 || return 1
+
+	# Now test in the other direction (should fail)
+	echo "client" | \
+		ovs_netns_spawn_daemon "test_nat_connect_v4" "client" \
+				nc -lvnp 4443
+	ovs_sbx "test_nat_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.10 4443
+	if [ $? == 0 ]; then
+	   info "connect to client was successful"
+	   return 1
+	fi
+
+	info "done..."
+	return 0
+}
+
+# nat_related_v4 test
+#  - client->server ip packets go via SNAT
+#  - client solicits ICMP destination unreachable packet from server
+#  - undo NAT for ICMP reply and test dst ip has been updated
+test_nat_related_v4 () {
+	which nc >/dev/null 2>/dev/null || return $ksft_skip
+
+	sbx_add "test_nat_related_v4" || return $?
+
+	ovs_add_dp "test_nat_related_v4" natrelated4 || return 1
+	info "create namespaces"
+	for ns in client server; do
+		ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \
+			"${ns:0:1}0" "${ns:0:1}1" || return 1
+	done
+
+	ip netns exec client ip addr add 172.31.110.10/24 dev c1
+	ip netns exec client ip link set c1 up
+	ip netns exec server ip addr add 172.31.110.20/24 dev s1
+	ip netns exec server ip link set s1 up
+
+	ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10
+
+	# Allow ARP
+	ovs_add_flow "test_nat_related_v4" natrelated4 \
+		"in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1
+	ovs_add_flow "test_nat_related_v4" natrelated4 \
+		"in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1
+
+	# Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20
+	ovs_add_flow "test_nat_related_v4" natrelated4 \
+		"ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \
+		"ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1
+	ovs_add_flow "test_nat_related_v4" natrelated4 \
+		"recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \
+		"2" || return 1
+
+	# Allow related ICMP responses back from server and undo NAT to restore original IP
+	# Drop any ICMP related packets where dst ip hasn't been restored back to original IP
+	ovs_add_flow "test_nat_related_v4" natrelated4 \
+		"ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \
+		"ct(commit,nat),recirc(0x2)" || return 1
+	ovs_add_flow "test_nat_related_v4" natrelated4 \
+		"recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \
+		"1" || return 1
+	ovs_add_flow "test_nat_related_v4" natrelated4 \
+		"recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \
+		"drop" || return 1
+
+	# Solicit destination unreachable response from server
+	ovs_sbx "test_nat_related_v4" ip netns exec client \
+		bash -c "echo a | nc -u -w 1 172.31.110.20 10000"
+
+	# Check to make sure no packets matched the drop rule with incorrect dst ip
+	python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \
+		| grep "drop" | grep "packets:0" >/dev/null || return 1
+
+	info "done..."
+	return 0
+}
+
+# netlink_validation
+# - Create a dp
+# - check no warning with "old version" simulation
+test_netlink_checks () {
+	sbx_add "test_netlink_checks" || return 1
+
+	info "setting up new DP"
+	ovs_add_dp "test_netlink_checks" nv0 || return 1
+	# now try again
+	PRE_TEST=$(dmesg | grep -E "RIP: [0-9a-fA-Fx]+:ovs_dp_cmd_new\+")
+	ovs_add_dp "test_netlink_checks" nv0 -V 0 || return 1
+	POST_TEST=$(dmesg | grep -E "RIP: [0-9a-fA-Fx]+:ovs_dp_cmd_new\+")
+	if [ "$PRE_TEST" != "$POST_TEST" ]; then
+		info "failed - gen warning"
+		return 1
+	fi
+
+	ovs_add_netns_and_veths "test_netlink_checks" nv0 left left0 l0 || \
+	    return 1
+	ovs_add_netns_and_veths "test_netlink_checks" nv0 right right0 r0 || \
+	    return 1
+	[ $(python3 $ovs_base/ovs-dpctl.py show nv0 | grep port | \
+	    wc -l) == 3 ] || \
+	      return 1
+	ovs_del_if "test_netlink_checks" nv0 right0 || return 1
+	[ $(python3 $ovs_base/ovs-dpctl.py show nv0 | grep port | \
+	    wc -l) == 2 ] || \
+	      return 1
+
+	info "Checking clone depth"
+	ERR_MSG="Flow actions may not be safe on all matching packets"
+	PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+	ovs_add_flow "test_netlink_checks" nv0 \
+		'in_port(1),eth(),eth_type(0x800),ipv4()' \
+		'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \
+		>/dev/null 2>&1 && return 1
+	POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+
+	if [ "$PRE_TEST" == "$POST_TEST" ]; then
+		info "failed - clone depth too large"
+		return 1
+	fi
+
+	PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+	ovs_add_flow "test_netlink_checks" nv0 \
+		'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \
+		&> /dev/null && return 1
+	POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+	if [ "$PRE_TEST" == "$POST_TEST" ]; then
+		info "failed - error not generated"
+		return 1
+	fi
+	return 0
+}
+
+test_upcall_interfaces() {
+	sbx_add "test_upcall_interfaces" || return 1
+
+	info "setting up new DP"
+	ovs_add_dp "test_upcall_interfaces" ui0 -V 2:1 || return 1
+
+	ovs_add_netns_and_veths "test_upcall_interfaces" ui0 upc left0 l0 \
+	    172.31.110.1/24 -u || return 1
+
+	sleep 1
+	info "sending arping"
+	ip netns exec upc arping -I l0 172.31.110.20 -c 1 \
+	    >$ovs_dir/arping.stdout 2>$ovs_dir/arping.stderr
+
+	grep -E "MISS upcall\[0/yes\]: .*arp\(sip=172.31.110.1,tip=172.31.110.20,op=1,sha=" $ovs_dir/left0.out >/dev/null 2>&1 || return 1
+	return 0
+}
+
+run_test() {
+	(
+	tname="$1"
+	tdesc="$2"
+
+	if ! lsmod | grep openvswitch >/dev/null 2>&1; then
+		stdbuf -o0 printf "TEST: %-60s  [NOMOD]\n" "${tdesc}"
+		return $ksft_skip
+	fi
+
+	if python3 ovs-dpctl.py -h 2>&1 | \
+	     grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then
+		stdbuf -o0 printf "TEST: %-60s  [PYLIB]\n" "${tdesc}"
+		return $ksft_skip
+	fi
+	printf "TEST: %-60s  [START]\n" "${tname}"
+
+	unset IFS
+
+	eval test_${tname}
+	ret=$?
+
+	if [ $ret -eq 0 ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${tdesc}"
+		ovs_exit_sig
+		rm -rf "$ovs_dir"
+	elif [ $ret -eq 1 ]; then
+		printf "TEST: %-60s  [FAIL]\n" "${tdesc}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "Pausing. Logs in $ovs_dir/. Hit enter to continue"
+			read a
+		fi
+		ovs_exit_sig
+		[ "${PAUSE_ON_FAIL}" = "yes" ] || rm -rf "$ovs_dir"
+		exit 1
+	elif [ $ret -eq $ksft_skip ]; then
+		printf "TEST: %-60s  [SKIP]\n" "${tdesc}"
+	elif [ $ret -eq 2 ]; then
+		rm -rf test_${tname}
+		run_test "$1" "$2"
+	fi
+
+	return $ret
+	)
+	ret=$?
+	case $ret in
+		0)
+			[ $all_skipped = true ] && [ $exitcode=$ksft_skip ] && exitcode=0
+			all_skipped=false
+		;;
+		$ksft_skip)
+			[ $all_skipped = true ] && exitcode=$ksft_skip
+		;;
+		*)
+			all_skipped=false
+			exitcode=1
+		;;
+	esac
+
+	return $ret
+}
+
+
+exitcode=0
+desc=0
+all_skipped=true
+
+while getopts :pvt o
+do
+	case $o in
+	p) PAUSE_ON_FAIL=yes;;
+	v) VERBOSE=1;;
+	t) if which tcpdump > /dev/null 2>&1; then
+		TRACING=1
+	   else
+		echo "=== tcpdump not available, tracing disabled"
+	   fi
+	   ;;
+	*) usage;;
+	esac
+done
+shift $(($OPTIND-1))
+
+IFS="	
+"
+
+for arg do
+	# Check first that all requested tests are available before running any
+	command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
+done
+
+name=""
+desc=""
+for t in ${tests}; do
+	[ "${name}" = "" ]	&& name="${t}"	&& continue
+	[ "${desc}" = "" ]	&& desc="${t}"
+
+	run_this=1
+	for arg do
+		[ "${arg}" != "${arg#--*}" ] && continue
+		[ "${arg}" = "${name}" ] && run_this=1 && break
+		run_this=0
+	done
+	if [ $run_this -eq 1 ]; then
+		run_test "${name}" "${desc}"
+	fi
+	name=""
+	desc=""
+done
+
+exit ${exitcode}
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
new file mode 100644
index 000000000000..5e0e539a323d
--- /dev/null
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -0,0 +1,2236 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# Controls the openvswitch module.  Part of the kselftest suite, but
+# can be used for some diagnostic purpose as well.
+
+import argparse
+import errno
+import ipaddress
+import logging
+import multiprocessing
+import re
+import struct
+import sys
+import time
+import types
+import uuid
+
+try:
+    from pyroute2 import NDB
+
+    from pyroute2.netlink import NLA_F_NESTED
+    from pyroute2.netlink import NLM_F_ACK
+    from pyroute2.netlink import NLM_F_DUMP
+    from pyroute2.netlink import NLM_F_REQUEST
+    from pyroute2.netlink import genlmsg
+    from pyroute2.netlink import nla
+    from pyroute2.netlink import nlmsg_atoms
+    from pyroute2.netlink.exceptions import NetlinkError
+    from pyroute2.netlink.generic import GenericNetlinkSocket
+    import pyroute2
+
+except ModuleNotFoundError:
+    print("Need to install the python pyroute2 package >= 0.6.")
+    sys.exit(0)
+
+
+OVS_DATAPATH_FAMILY = "ovs_datapath"
+OVS_VPORT_FAMILY = "ovs_vport"
+OVS_FLOW_FAMILY = "ovs_flow"
+OVS_PACKET_FAMILY = "ovs_packet"
+OVS_METER_FAMILY = "ovs_meter"
+OVS_CT_LIMIT_FAMILY = "ovs_ct_limit"
+
+OVS_DATAPATH_VERSION = 2
+OVS_DP_CMD_NEW = 1
+OVS_DP_CMD_DEL = 2
+OVS_DP_CMD_GET = 3
+OVS_DP_CMD_SET = 4
+
+OVS_VPORT_CMD_NEW = 1
+OVS_VPORT_CMD_DEL = 2
+OVS_VPORT_CMD_GET = 3
+OVS_VPORT_CMD_SET = 4
+
+OVS_FLOW_CMD_NEW = 1
+OVS_FLOW_CMD_DEL = 2
+OVS_FLOW_CMD_GET = 3
+OVS_FLOW_CMD_SET = 4
+
+
+def macstr(mac):
+    outstr = ":".join(["%02X" % i for i in mac])
+    return outstr
+
+
+def strcspn(str1, str2):
+    tot = 0
+    for char in str1:
+        if str2.find(char) != -1:
+            return tot
+        tot += 1
+    return tot
+
+
+def strspn(str1, str2):
+    tot = 0
+    for char in str1:
+        if str2.find(char) == -1:
+            return tot
+        tot += 1
+    return tot
+
+
+def intparse(statestr, defmask="0xffffffff"):
+    totalparse = strspn(statestr, "0123456789abcdefABCDEFx/")
+    # scan until "/"
+    count = strspn(statestr, "x0123456789abcdefABCDEF")
+
+    firstnum = statestr[:count]
+    if firstnum[-1] == "/":
+        firstnum = firstnum[:-1]
+    k = int(firstnum, 0)
+
+    m = None
+    if defmask is not None:
+        secondnum = defmask
+        if statestr[count] == "/":
+            secondnum = statestr[count + 1 :]  # this is wrong...
+        m = int(secondnum, 0)
+
+    return statestr[totalparse + 1 :], k, m
+
+
+def parse_flags(flag_str, flag_vals):
+    bitResult = 0
+    maskResult = 0
+
+    if len(flag_str) == 0:
+        return flag_str, bitResult, maskResult
+
+    if flag_str[0].isdigit():
+        idx = 0
+        while flag_str[idx].isdigit() or flag_str[idx] == "x":
+            idx += 1
+        digits = flag_str[:idx]
+        flag_str = flag_str[idx:]
+
+        bitResult = int(digits, 0)
+        maskResult = int(digits, 0)
+
+    while len(flag_str) > 0 and (flag_str[0] == "+" or flag_str[0] == "-"):
+        if flag_str[0] == "+":
+            setFlag = True
+        elif flag_str[0] == "-":
+            setFlag = False
+
+        flag_str = flag_str[1:]
+
+        flag_len = 0
+        while (
+            flag_str[flag_len] != "+"
+            and flag_str[flag_len] != "-"
+            and flag_str[flag_len] != ","
+            and flag_str[flag_len] != ")"
+        ):
+            flag_len += 1
+
+        flag = flag_str[0:flag_len]
+
+        if flag in flag_vals:
+            if maskResult & flag_vals[flag]:
+                raise KeyError(
+                    "Flag %s set once, cannot be set in multiples" % flag
+                )
+
+            if setFlag:
+                bitResult |= flag_vals[flag]
+
+            maskResult |= flag_vals[flag]
+        else:
+            raise KeyError("Missing flag value: %s" % flag)
+
+        flag_str = flag_str[flag_len:]
+
+    return flag_str, bitResult, maskResult
+
+
+def parse_ct_state(statestr):
+    ct_flags = {
+        "new": 1 << 0,
+        "est": 1 << 1,
+        "rel": 1 << 2,
+        "rpl": 1 << 3,
+        "inv": 1 << 4,
+        "trk": 1 << 5,
+        "snat": 1 << 6,
+        "dnat": 1 << 7,
+    }
+
+    return parse_flags(statestr, ct_flags)
+
+
+def convert_mac(data):
+    def to_bytes(mac):
+        mac_split = mac.split(":")
+        ret = bytearray([int(i, 16) for i in mac_split])
+        return bytes(ret)
+
+    mac_str, _, mask_str = data.partition('/')
+
+    if not mac_str:
+        mac_str = mask_str = "00:00:00:00:00:00"
+    elif not mask_str:
+        mask_str = "FF:FF:FF:FF:FF:FF"
+
+    return to_bytes(mac_str), to_bytes(mask_str)
+
+def convert_ipv4(data):
+    ip, _, mask = data.partition('/')
+
+    if not ip:
+        ip = mask = 0
+    elif not mask:
+        mask = 0xFFFFFFFF
+    elif mask.isdigit():
+        mask = (0xFFFFFFFF << (32 - int(mask))) & 0xFFFFFFFF
+
+    return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask))
+
+def convert_int(size):
+    def convert_int_sized(data):
+        value, _, mask = data.partition('/')
+
+        if not value:
+            return 0, 0
+        elif not mask:
+            return int(value, 0), pow(2, size) - 1
+        else:
+            return int(value, 0), int(mask, 0)
+
+    return convert_int_sized
+
+def parse_starts_block(block_str, scanstr, returnskipped, scanregex=False):
+    if scanregex:
+        m = re.search(scanstr, block_str)
+        if m is None:
+            if returnskipped:
+                return block_str
+            return False
+        if returnskipped:
+            block_str = block_str[len(m.group(0)) :]
+            return block_str
+        return True
+
+    if block_str.startswith(scanstr):
+        if returnskipped:
+            block_str = block_str[len(scanstr) :]
+        else:
+            return True
+
+    if returnskipped:
+        return block_str
+
+    return False
+
+
+def parse_extract_field(
+    block_str, fieldstr, scanfmt, convert, masked=False, defval=None
+):
+    if fieldstr and not block_str.startswith(fieldstr):
+        return block_str, defval
+
+    if fieldstr:
+        str_skiplen = len(fieldstr)
+        str_skipped = block_str[str_skiplen:]
+        if str_skiplen == 0:
+            return str_skipped, defval
+    else:
+        str_skiplen = 0
+        str_skipped = block_str
+
+    m = re.search(scanfmt, str_skipped)
+    if m is None:
+        raise ValueError("Bad fmt string")
+
+    data = m.group(0)
+    if convert:
+        data = convert(m.group(0))
+
+    str_skipped = str_skipped[len(m.group(0)) :]
+    if masked:
+        if str_skipped[0] == "/":
+            raise ValueError("Masking support TBD...")
+
+    str_skipped = str_skipped[strspn(str_skipped, ", ") :]
+    return str_skipped, data
+
+
+class ovs_dp_msg(genlmsg):
+    # include the OVS version
+    # We need a custom header rather than just being able to rely on
+    # genlmsg because fields ends up not expressing everything correctly
+    # if we use the canonical example of setting fields = (('customfield',),)
+    fields = genlmsg.fields + (("dpifindex", "I"),)
+
+
+class ovsactions(nla):
+    nla_flags = NLA_F_NESTED
+
+    nla_map = (
+        ("OVS_ACTION_ATTR_UNSPEC", "none"),
+        ("OVS_ACTION_ATTR_OUTPUT", "uint32"),
+        ("OVS_ACTION_ATTR_USERSPACE", "userspace"),
+        ("OVS_ACTION_ATTR_SET", "none"),
+        ("OVS_ACTION_ATTR_PUSH_VLAN", "none"),
+        ("OVS_ACTION_ATTR_POP_VLAN", "flag"),
+        ("OVS_ACTION_ATTR_SAMPLE", "none"),
+        ("OVS_ACTION_ATTR_RECIRC", "uint32"),
+        ("OVS_ACTION_ATTR_HASH", "none"),
+        ("OVS_ACTION_ATTR_PUSH_MPLS", "none"),
+        ("OVS_ACTION_ATTR_POP_MPLS", "flag"),
+        ("OVS_ACTION_ATTR_SET_MASKED", "none"),
+        ("OVS_ACTION_ATTR_CT", "ctact"),
+        ("OVS_ACTION_ATTR_TRUNC", "uint32"),
+        ("OVS_ACTION_ATTR_PUSH_ETH", "none"),
+        ("OVS_ACTION_ATTR_POP_ETH", "flag"),
+        ("OVS_ACTION_ATTR_CT_CLEAR", "flag"),
+        ("OVS_ACTION_ATTR_PUSH_NSH", "none"),
+        ("OVS_ACTION_ATTR_POP_NSH", "flag"),
+        ("OVS_ACTION_ATTR_METER", "none"),
+        ("OVS_ACTION_ATTR_CLONE", "recursive"),
+        ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"),
+        ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
+        ("OVS_ACTION_ATTR_DEC_TTL", "none"),
+        ("OVS_ACTION_ATTR_DROP", "uint32"),
+    )
+
+    class ctact(nla):
+        nla_flags = NLA_F_NESTED
+
+        nla_map = (
+            ("OVS_CT_ATTR_NONE", "none"),
+            ("OVS_CT_ATTR_COMMIT", "flag"),
+            ("OVS_CT_ATTR_ZONE", "uint16"),
+            ("OVS_CT_ATTR_MARK", "none"),
+            ("OVS_CT_ATTR_LABELS", "none"),
+            ("OVS_CT_ATTR_HELPER", "asciiz"),
+            ("OVS_CT_ATTR_NAT", "natattr"),
+            ("OVS_CT_ATTR_FORCE_COMMIT", "flag"),
+            ("OVS_CT_ATTR_EVENTMASK", "uint32"),
+            ("OVS_CT_ATTR_TIMEOUT", "asciiz"),
+        )
+
+        class natattr(nla):
+            nla_flags = NLA_F_NESTED
+
+            nla_map = (
+                ("OVS_NAT_ATTR_NONE", "none"),
+                ("OVS_NAT_ATTR_SRC", "flag"),
+                ("OVS_NAT_ATTR_DST", "flag"),
+                ("OVS_NAT_ATTR_IP_MIN", "ipaddr"),
+                ("OVS_NAT_ATTR_IP_MAX", "ipaddr"),
+                ("OVS_NAT_ATTR_PROTO_MIN", "uint16"),
+                ("OVS_NAT_ATTR_PROTO_MAX", "uint16"),
+                ("OVS_NAT_ATTR_PERSISTENT", "flag"),
+                ("OVS_NAT_ATTR_PROTO_HASH", "flag"),
+                ("OVS_NAT_ATTR_PROTO_RANDOM", "flag"),
+            )
+
+            def dpstr(self, more=False):
+                print_str = "nat("
+
+                if self.get_attr("OVS_NAT_ATTR_SRC"):
+                    print_str += "src"
+                elif self.get_attr("OVS_NAT_ATTR_DST"):
+                    print_str += "dst"
+                else:
+                    print_str += "XXX-unknown-nat"
+
+                if self.get_attr("OVS_NAT_ATTR_IP_MIN") or self.get_attr(
+                    "OVS_NAT_ATTR_IP_MAX"
+                ):
+                    if self.get_attr("OVS_NAT_ATTR_IP_MIN"):
+                        print_str += "=%s," % str(
+                            self.get_attr("OVS_NAT_ATTR_IP_MIN")
+                        )
+
+                    if self.get_attr("OVS_NAT_ATTR_IP_MAX"):
+                        print_str += "-%s," % str(
+                            self.get_attr("OVS_NAT_ATTR_IP_MAX")
+                        )
+                else:
+                    print_str += ","
+
+                if self.get_attr("OVS_NAT_ATTR_PROTO_MIN"):
+                    print_str += "proto_min=%d," % self.get_attr(
+                        "OVS_NAT_ATTR_PROTO_MIN"
+                    )
+
+                if self.get_attr("OVS_NAT_ATTR_PROTO_MAX"):
+                    print_str += "proto_max=%d," % self.get_attr(
+                        "OVS_NAT_ATTR_PROTO_MAX"
+                    )
+
+                if self.get_attr("OVS_NAT_ATTR_PERSISTENT"):
+                    print_str += "persistent,"
+                if self.get_attr("OVS_NAT_ATTR_HASH"):
+                    print_str += "hash,"
+                if self.get_attr("OVS_NAT_ATTR_RANDOM"):
+                    print_str += "random"
+                print_str += ")"
+                return print_str
+
+        def dpstr(self, more=False):
+            print_str = "ct("
+
+            if self.get_attr("OVS_CT_ATTR_COMMIT") is not None:
+                print_str += "commit,"
+            if self.get_attr("OVS_CT_ATTR_ZONE") is not None:
+                print_str += "zone=%d," % self.get_attr("OVS_CT_ATTR_ZONE")
+            if self.get_attr("OVS_CT_ATTR_HELPER") is not None:
+                print_str += "helper=%s," % self.get_attr("OVS_CT_ATTR_HELPER")
+            if self.get_attr("OVS_CT_ATTR_NAT") is not None:
+                print_str += self.get_attr("OVS_CT_ATTR_NAT").dpstr(more)
+                print_str += ","
+            if self.get_attr("OVS_CT_ATTR_FORCE_COMMIT") is not None:
+                print_str += "force,"
+            if self.get_attr("OVS_CT_ATTR_EVENTMASK") is not None:
+                print_str += "emask=0x%X," % self.get_attr(
+                    "OVS_CT_ATTR_EVENTMASK"
+                )
+            if self.get_attr("OVS_CT_ATTR_TIMEOUT") is not None:
+                print_str += "timeout=%s" % self.get_attr(
+                    "OVS_CT_ATTR_TIMEOUT"
+                )
+            print_str += ")"
+            return print_str
+
+    class userspace(nla):
+        nla_flags = NLA_F_NESTED
+
+        nla_map = (
+            ("OVS_USERSPACE_ATTR_UNUSED", "none"),
+            ("OVS_USERSPACE_ATTR_PID", "uint32"),
+            ("OVS_USERSPACE_ATTR_USERDATA", "array(uint8)"),
+            ("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", "uint32"),
+        )
+
+        def dpstr(self, more=False):
+            print_str = "userspace("
+            if self.get_attr("OVS_USERSPACE_ATTR_PID") is not None:
+                print_str += "pid=%d," % self.get_attr(
+                    "OVS_USERSPACE_ATTR_PID"
+                )
+            if self.get_attr("OVS_USERSPACE_ATTR_USERDATA") is not None:
+                print_str += "userdata="
+                for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"):
+                    print_str += "%x." % f
+            if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None:
+                print_str += "egress_tun_port=%d" % self.get_attr(
+                    "OVS_USERSPACE_ATTR_TUN_PORT"
+                )
+            print_str += ")"
+            return print_str
+
+    def dpstr(self, more=False):
+        print_str = ""
+
+        for field in self.nla_map:
+            if field[1] == "none" or self.get_attr(field[0]) is None:
+                continue
+            if print_str != "":
+                print_str += ","
+
+            if field[1] == "uint32":
+                if field[0] == "OVS_ACTION_ATTR_OUTPUT":
+                    print_str += "%d" % int(self.get_attr(field[0]))
+                elif field[0] == "OVS_ACTION_ATTR_RECIRC":
+                    print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
+                elif field[0] == "OVS_ACTION_ATTR_TRUNC":
+                    print_str += "trunc(%d)" % int(self.get_attr(field[0]))
+                elif field[0] == "OVS_ACTION_ATTR_DROP":
+                    print_str += "drop(%d)" % int(self.get_attr(field[0]))
+            elif field[1] == "flag":
+                if field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
+                    print_str += "ct_clear"
+                elif field[0] == "OVS_ACTION_ATTR_POP_VLAN":
+                    print_str += "pop_vlan"
+                elif field[0] == "OVS_ACTION_ATTR_POP_ETH":
+                    print_str += "pop_eth"
+                elif field[0] == "OVS_ACTION_ATTR_POP_NSH":
+                    print_str += "pop_nsh"
+                elif field[0] == "OVS_ACTION_ATTR_POP_MPLS":
+                    print_str += "pop_mpls"
+            else:
+                datum = self.get_attr(field[0])
+                if field[0] == "OVS_ACTION_ATTR_CLONE":
+                    print_str += "clone("
+                    print_str += datum.dpstr(more)
+                    print_str += ")"
+                else:
+                    print_str += datum.dpstr(more)
+
+        return print_str
+
+    def parse(self, actstr):
+        totallen = len(actstr)
+        while len(actstr) != 0:
+            parsed = False
+            parencount = 0
+            if actstr.startswith("drop"):
+                # If no reason is provided, the implicit drop is used (i.e no
+                # action). If some reason is given, an explicit action is used.
+                reason = None
+                if actstr.startswith("drop("):
+                    parencount += 1
+
+                    actstr, reason = parse_extract_field(
+                        actstr,
+                        "drop(",
+                        "([0-9]+)",
+                        lambda x: int(x, 0),
+                        False,
+                        None,
+                    )
+
+                if reason is not None:
+                    self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason])
+                    parsed = True
+                else:
+                    actstr = actstr[len("drop"): ]
+                    return (totallen - len(actstr))
+
+            elif parse_starts_block(actstr, "^(\d+)", False, True):
+                actstr, output = parse_extract_field(
+                    actstr, None, "(\d+)", lambda x: int(x), False, "0"
+                )
+                self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output])
+                parsed = True
+            elif parse_starts_block(actstr, "recirc(", False):
+                actstr, recircid = parse_extract_field(
+                    actstr,
+                    "recirc(",
+                    "([0-9a-fA-Fx]+)",
+                    lambda x: int(x, 0),
+                    False,
+                    0,
+                )
+                parencount += 1
+                self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid])
+                parsed = True
+
+            parse_flat_map = (
+                ("ct_clear", "OVS_ACTION_ATTR_CT_CLEAR"),
+                ("pop_vlan", "OVS_ACTION_ATTR_POP_VLAN"),
+                ("pop_eth", "OVS_ACTION_ATTR_POP_ETH"),
+                ("pop_nsh", "OVS_ACTION_ATTR_POP_NSH"),
+            )
+
+            for flat_act in parse_flat_map:
+                if parse_starts_block(actstr, flat_act[0], False):
+                    actstr = actstr[len(flat_act[0]):]
+                    self["attrs"].append([flat_act[1]])
+                    actstr = actstr[strspn(actstr, ", ") :]
+                    parsed = True
+
+            if parse_starts_block(actstr, "clone(", False):
+                parencount += 1
+                subacts = ovsactions()
+                actstr = actstr[len("clone("):]
+                parsedLen = subacts.parse(actstr)
+                lst = []
+                self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts))
+                actstr = actstr[parsedLen:]
+                parsed = True
+            elif parse_starts_block(actstr, "ct(", False):
+                parencount += 1
+                actstr = actstr[len("ct(") :]
+                ctact = ovsactions.ctact()
+
+                for scan in (
+                    ("commit", "OVS_CT_ATTR_COMMIT", None),
+                    ("force_commit", "OVS_CT_ATTR_FORCE_COMMIT", None),
+                    ("zone", "OVS_CT_ATTR_ZONE", int),
+                    ("mark", "OVS_CT_ATTR_MARK", int),
+                    ("helper", "OVS_CT_ATTR_HELPER", lambda x, y: str(x)),
+                    ("timeout", "OVS_CT_ATTR_TIMEOUT", lambda x, y: str(x)),
+                ):
+                    if actstr.startswith(scan[0]):
+                        actstr = actstr[len(scan[0]) :]
+                        if scan[2] is not None:
+                            if actstr[0] != "=":
+                                raise ValueError("Invalid ct attr")
+                            actstr = actstr[1:]
+                            pos = strcspn(actstr, ",)")
+                            datum = scan[2](actstr[:pos], 0)
+                            ctact["attrs"].append([scan[1], datum])
+                            actstr = actstr[pos:]
+                        else:
+                            ctact["attrs"].append([scan[1], None])
+                        actstr = actstr[strspn(actstr, ", ") :]
+                    # it seems strange to put this here, but nat() is a complex
+                    # sub-action and this lets it sit anywhere in the ct() action
+                    if actstr.startswith("nat"):
+                        actstr = actstr[3:]
+                        natact = ovsactions.ctact.natattr()
+
+                        if actstr.startswith("("):
+                            parencount += 1
+                            t = None
+                            actstr = actstr[1:]
+                            if actstr.startswith("src"):
+                                t = "OVS_NAT_ATTR_SRC"
+                                actstr = actstr[3:]
+                            elif actstr.startswith("dst"):
+                                t = "OVS_NAT_ATTR_DST"
+                                actstr = actstr[3:]
+
+                            actstr, ip_block_min = parse_extract_field(
+                                actstr, "=", "([0-9a-fA-F\.]+)", str, False
+                            )
+                            actstr, ip_block_max = parse_extract_field(
+                                actstr, "-", "([0-9a-fA-F\.]+)", str, False
+                            )
+
+                            actstr, proto_min = parse_extract_field(
+                                actstr, ":", "(\d+)", int, False
+                            )
+                            actstr, proto_max = parse_extract_field(
+                                actstr, "-", "(\d+)", int, False
+                            )
+
+                            if t is not None:
+                                natact["attrs"].append([t, None])
+
+                                if ip_block_min is not None:
+                                    natact["attrs"].append(
+                                        ["OVS_NAT_ATTR_IP_MIN", ip_block_min]
+                                    )
+                                if ip_block_max is not None:
+                                    natact["attrs"].append(
+                                        ["OVS_NAT_ATTR_IP_MAX", ip_block_max]
+                                    )
+                                if proto_min is not None:
+                                    natact["attrs"].append(
+                                        ["OVS_NAT_ATTR_PROTO_MIN", proto_min]
+                                    )
+                                if proto_max is not None:
+                                    natact["attrs"].append(
+                                        ["OVS_NAT_ATTR_PROTO_MAX", proto_max]
+                                    )
+
+                            for natscan in (
+                                ("persistent", "OVS_NAT_ATTR_PERSISTENT"),
+                                ("hash", "OVS_NAT_ATTR_PROTO_HASH"),
+                                ("random", "OVS_NAT_ATTR_PROTO_RANDOM"),
+                            ):
+                                if actstr.startswith(natscan[0]):
+                                    actstr = actstr[len(natscan[0]) :]
+                                    natact["attrs"].append([natscan[1], None])
+                                    actstr = actstr[strspn(actstr, ", ") :]
+
+                        ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact])
+                        actstr = actstr[strspn(actstr, ", ") :]
+
+                self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact])
+                parsed = True
+
+            actstr = actstr[strspn(actstr, ", ") :]
+            while parencount > 0:
+                parencount -= 1
+                actstr = actstr[strspn(actstr, " "):]
+                if len(actstr) and actstr[0] != ")":
+                    raise ValueError("Action str: '%s' unbalanced" % actstr)
+                actstr = actstr[1:]
+
+            if len(actstr) and actstr[0] == ")":
+                return (totallen - len(actstr))
+
+            actstr = actstr[strspn(actstr, ", ") :]
+
+            if not parsed:
+                raise ValueError("Action str: '%s' not supported" % actstr)
+
+        return (totallen - len(actstr))
+
+
+class ovskey(nla):
+    nla_flags = NLA_F_NESTED
+    nla_map = (
+        ("OVS_KEY_ATTR_UNSPEC", "none"),
+        ("OVS_KEY_ATTR_ENCAP", "none"),
+        ("OVS_KEY_ATTR_PRIORITY", "uint32"),
+        ("OVS_KEY_ATTR_IN_PORT", "uint32"),
+        ("OVS_KEY_ATTR_ETHERNET", "ethaddr"),
+        ("OVS_KEY_ATTR_VLAN", "uint16"),
+        ("OVS_KEY_ATTR_ETHERTYPE", "be16"),
+        ("OVS_KEY_ATTR_IPV4", "ovs_key_ipv4"),
+        ("OVS_KEY_ATTR_IPV6", "ovs_key_ipv6"),
+        ("OVS_KEY_ATTR_TCP", "ovs_key_tcp"),
+        ("OVS_KEY_ATTR_UDP", "ovs_key_udp"),
+        ("OVS_KEY_ATTR_ICMP", "ovs_key_icmp"),
+        ("OVS_KEY_ATTR_ICMPV6", "ovs_key_icmpv6"),
+        ("OVS_KEY_ATTR_ARP", "ovs_key_arp"),
+        ("OVS_KEY_ATTR_ND", "ovs_key_nd"),
+        ("OVS_KEY_ATTR_SKB_MARK", "uint32"),
+        ("OVS_KEY_ATTR_TUNNEL", "none"),
+        ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"),
+        ("OVS_KEY_ATTR_TCP_FLAGS", "be16"),
+        ("OVS_KEY_ATTR_DP_HASH", "uint32"),
+        ("OVS_KEY_ATTR_RECIRC_ID", "uint32"),
+        ("OVS_KEY_ATTR_MPLS", "array(ovs_key_mpls)"),
+        ("OVS_KEY_ATTR_CT_STATE", "uint32"),
+        ("OVS_KEY_ATTR_CT_ZONE", "uint16"),
+        ("OVS_KEY_ATTR_CT_MARK", "uint32"),
+        ("OVS_KEY_ATTR_CT_LABELS", "none"),
+        ("OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4", "ovs_key_ct_tuple_ipv4"),
+        ("OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6", "ovs_key_ct_tuple_ipv6"),
+        ("OVS_KEY_ATTR_NSH", "none"),
+        ("OVS_KEY_ATTR_PACKET_TYPE", "none"),
+        ("OVS_KEY_ATTR_ND_EXTENSIONS", "none"),
+        ("OVS_KEY_ATTR_TUNNEL_INFO", "none"),
+        ("OVS_KEY_ATTR_IPV6_EXTENSIONS", "none"),
+    )
+
+    class ovs_key_proto(nla):
+        fields = (
+            ("src", "!H"),
+            ("dst", "!H"),
+        )
+
+        fields_map = (
+            ("src", "src", "%d", lambda x: int(x) if x else 0,
+                convert_int(16)),
+            ("dst", "dst", "%d", lambda x: int(x) if x else 0,
+                convert_int(16)),
+        )
+
+        def __init__(
+            self,
+            protostr,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            self.proto_str = protostr
+            nla.__init__(
+                self,
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+        def parse(self, flowstr, typeInst):
+            if not flowstr.startswith(self.proto_str):
+                return None, None
+
+            k = typeInst()
+            m = typeInst()
+
+            flowstr = flowstr[len(self.proto_str) :]
+            if flowstr.startswith("("):
+                flowstr = flowstr[1:]
+
+            keybits = b""
+            maskbits = b""
+            for f in self.fields_map:
+                if flowstr.startswith(f[1]):
+                    # the following assumes that the field looks
+                    # something like 'field.' where '.' is a
+                    # character that we don't exactly care about.
+                    flowstr = flowstr[len(f[1]) + 1 :]
+                    splitchar = 0
+                    for c in flowstr:
+                        if c == "," or c == ")":
+                            break
+                        splitchar += 1
+                    data = flowstr[:splitchar]
+                    flowstr = flowstr[splitchar:]
+                else:
+                    data = ""
+
+                if len(f) > 4:
+                    k[f[0]], m[f[0]] = f[4](data)
+                else:
+                    k[f[0]] = f[3](data)
+                    m[f[0]] = f[3](data)
+
+                flowstr = flowstr[strspn(flowstr, ", ") :]
+                if len(flowstr) == 0:
+                    return flowstr, k, m
+
+            flowstr = flowstr[strspn(flowstr, "), ") :]
+
+            return flowstr, k, m
+
+        def dpstr(self, masked=None, more=False):
+            outstr = self.proto_str + "("
+            first = False
+            for f in self.fields_map:
+                if first:
+                    outstr += ","
+                if masked is None:
+                    outstr += "%s=" % f[0]
+                    if isinstance(f[2], str):
+                        outstr += f[2] % self[f[1]]
+                    else:
+                        outstr += f[2](self[f[1]])
+                    first = True
+                elif more or f[3](masked[f[1]]) != 0:
+                    outstr += "%s=" % f[0]
+                    if isinstance(f[2], str):
+                        outstr += f[2] % self[f[1]]
+                    else:
+                        outstr += f[2](self[f[1]])
+                    outstr += "/"
+                    if isinstance(f[2], str):
+                        outstr += f[2] % masked[f[1]]
+                    else:
+                        outstr += f[2](masked[f[1]])
+                    first = True
+            outstr += ")"
+            return outstr
+
+    class ethaddr(ovs_key_proto):
+        fields = (
+            ("src", "!6s"),
+            ("dst", "!6s"),
+        )
+
+        fields_map = (
+            (
+                "src",
+                "src",
+                macstr,
+                lambda x: int.from_bytes(x, "big"),
+                convert_mac,
+            ),
+            (
+                "dst",
+                "dst",
+                macstr,
+                lambda x: int.from_bytes(x, "big"),
+                convert_mac,
+            ),
+        )
+
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "eth",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_ipv4(ovs_key_proto):
+        fields = (
+            ("src", "!I"),
+            ("dst", "!I"),
+            ("proto", "B"),
+            ("tos", "B"),
+            ("ttl", "B"),
+            ("frag", "B"),
+        )
+
+        fields_map = (
+            (
+                "src",
+                "src",
+                lambda x: str(ipaddress.IPv4Address(x)),
+                int,
+                convert_ipv4,
+            ),
+            (
+                "dst",
+                "dst",
+                lambda x: str(ipaddress.IPv4Address(x)),
+                int,
+                convert_ipv4,
+            ),
+            ("proto", "proto", "%d", lambda x: int(x) if x else 0,
+                convert_int(8)),
+            ("tos", "tos", "%d", lambda x: int(x) if x else 0,
+                convert_int(8)),
+            ("ttl", "ttl", "%d", lambda x: int(x) if x else 0,
+                convert_int(8)),
+            ("frag", "frag", "%d", lambda x: int(x) if x else 0,
+                convert_int(8)),
+        )
+
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "ipv4",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_ipv6(ovs_key_proto):
+        fields = (
+            ("src", "!16s"),
+            ("dst", "!16s"),
+            ("label", "!I"),
+            ("proto", "B"),
+            ("tclass", "B"),
+            ("hlimit", "B"),
+            ("frag", "B"),
+        )
+
+        fields_map = (
+            (
+                "src",
+                "src",
+                lambda x: str(ipaddress.IPv6Address(x)),
+                lambda x: int.from_bytes(x, "big"),
+                lambda x: ipaddress.IPv6Address(x),
+            ),
+            (
+                "dst",
+                "dst",
+                lambda x: str(ipaddress.IPv6Address(x)),
+                lambda x: int.from_bytes(x, "big"),
+                lambda x: ipaddress.IPv6Address(x),
+            ),
+            ("label", "label", "%d", int),
+            ("proto", "proto", "%d", int),
+            ("tclass", "tclass", "%d", int),
+            ("hlimit", "hlimit", "%d", int),
+            ("frag", "frag", "%d", int),
+        )
+
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "ipv6",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_tcp(ovs_key_proto):
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "tcp",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_udp(ovs_key_proto):
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "udp",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_sctp(ovs_key_proto):
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "sctp",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_icmp(ovs_key_proto):
+        fields = (
+            ("type", "B"),
+            ("code", "B"),
+        )
+
+        fields_map = (
+            ("type", "type", "%d", lambda x: int(x) if x else 0),
+            ("code", "code", "%d", lambda x: int(x) if x else 0),
+        )
+
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "icmp",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_icmpv6(ovs_key_icmp):
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "icmpv6",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_arp(ovs_key_proto):
+        fields = (
+            ("sip", "!I"),
+            ("tip", "!I"),
+            ("op", "!H"),
+            ("sha", "!6s"),
+            ("tha", "!6s"),
+            ("pad", "xx"),
+        )
+
+        fields_map = (
+            (
+                "sip",
+                "sip",
+                lambda x: str(ipaddress.IPv4Address(x)),
+                int,
+                convert_ipv4,
+            ),
+            (
+                "tip",
+                "tip",
+                lambda x: str(ipaddress.IPv4Address(x)),
+                int,
+                convert_ipv4,
+            ),
+            ("op", "op", "%d", lambda x: int(x) if x else 0),
+            (
+                "sha",
+                "sha",
+                macstr,
+                lambda x: int.from_bytes(x, "big"),
+                convert_mac,
+            ),
+            (
+                "tha",
+                "tha",
+                macstr,
+                lambda x: int.from_bytes(x, "big"),
+                convert_mac,
+            ),
+        )
+
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "arp",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_nd(ovs_key_proto):
+        fields = (
+            ("target", "!16s"),
+            ("sll", "!6s"),
+            ("tll", "!6s"),
+        )
+
+        fields_map = (
+            (
+                "target",
+                "target",
+                lambda x: str(ipaddress.IPv6Address(x)),
+                lambda x: int.from_bytes(x, "big"),
+            ),
+            ("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")),
+            ("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")),
+        )
+
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "nd",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_ct_tuple_ipv4(ovs_key_proto):
+        fields = (
+            ("src", "!I"),
+            ("dst", "!I"),
+            ("tp_src", "!H"),
+            ("tp_dst", "!H"),
+            ("proto", "B"),
+        )
+
+        fields_map = (
+            (
+                "src",
+                "src",
+                lambda x: str(ipaddress.IPv4Address(x)),
+                int,
+                convert_ipv4,
+            ),
+            (
+                "dst",
+                "dst",
+                lambda x: str(ipaddress.IPv4Address(x)),
+                int,
+                convert_ipv4,
+            ),
+            ("tp_src", "tp_src", "%d", int),
+            ("tp_dst", "tp_dst", "%d", int),
+            ("proto", "proto", "%d", int),
+        )
+
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "ct_tuple4",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_ct_tuple_ipv6(nla):
+        fields = (
+            ("src", "!16s"),
+            ("dst", "!16s"),
+            ("tp_src", "!H"),
+            ("tp_dst", "!H"),
+            ("proto", "B"),
+        )
+
+        fields_map = (
+            (
+                "src",
+                "src",
+                lambda x: str(ipaddress.IPv6Address(x)),
+                lambda x: int.from_bytes(x, "big", convertmac),
+            ),
+            (
+                "dst",
+                "dst",
+                lambda x: str(ipaddress.IPv6Address(x)),
+                lambda x: int.from_bytes(x, "big"),
+            ),
+            ("tp_src", "tp_src", "%d", int),
+            ("tp_dst", "tp_dst", "%d", int),
+            ("proto", "proto", "%d", int),
+        )
+
+        def __init__(
+            self,
+            data=None,
+            offset=None,
+            parent=None,
+            length=None,
+            init=None,
+        ):
+            ovskey.ovs_key_proto.__init__(
+                self,
+                "ct_tuple6",
+                data=data,
+                offset=offset,
+                parent=parent,
+                length=length,
+                init=init,
+            )
+
+    class ovs_key_mpls(nla):
+        fields = (("lse", ">I"),)
+
+    def parse(self, flowstr, mask=None):
+        for field in (
+            ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse),
+            ("OVS_KEY_ATTR_SKB_MARK", "skb_mark", intparse),
+            ("OVS_KEY_ATTR_RECIRC_ID", "recirc_id", intparse),
+            ("OVS_KEY_ATTR_DP_HASH", "dp_hash", intparse),
+            ("OVS_KEY_ATTR_CT_STATE", "ct_state", parse_ct_state),
+            ("OVS_KEY_ATTR_CT_ZONE", "ct_zone", intparse),
+            ("OVS_KEY_ATTR_CT_MARK", "ct_mark", intparse),
+            ("OVS_KEY_ATTR_IN_PORT", "in_port", intparse),
+            (
+                "OVS_KEY_ATTR_ETHERNET",
+                "eth",
+                ovskey.ethaddr,
+            ),
+            (
+                "OVS_KEY_ATTR_ETHERTYPE",
+                "eth_type",
+                lambda x: intparse(x, "0xffff"),
+            ),
+            (
+                "OVS_KEY_ATTR_IPV4",
+                "ipv4",
+                ovskey.ovs_key_ipv4,
+            ),
+            (
+                "OVS_KEY_ATTR_IPV6",
+                "ipv6",
+                ovskey.ovs_key_ipv6,
+            ),
+            (
+                "OVS_KEY_ATTR_ARP",
+                "arp",
+                ovskey.ovs_key_arp,
+            ),
+            (
+                "OVS_KEY_ATTR_TCP",
+                "tcp",
+                ovskey.ovs_key_tcp,
+            ),
+            (
+                "OVS_KEY_ATTR_UDP",
+                "udp",
+                ovskey.ovs_key_udp,
+            ),
+            (
+                "OVS_KEY_ATTR_ICMP",
+                "icmp",
+                ovskey.ovs_key_icmp,
+            ),
+            (
+                "OVS_KEY_ATTR_TCP_FLAGS",
+                "tcp_flags",
+                lambda x: parse_flags(x, None),
+            ),
+        ):
+            fld = field[1] + "("
+            if not flowstr.startswith(fld):
+                continue
+
+            if not isinstance(field[2], types.FunctionType):
+                nk = field[2]()
+                flowstr, k, m = nk.parse(flowstr, field[2])
+            else:
+                flowstr = flowstr[len(fld) :]
+                flowstr, k, m = field[2](flowstr)
+
+            if m and mask is not None:
+                mask["attrs"].append([field[0], m])
+            self["attrs"].append([field[0], k])
+
+            flowstr = flowstr[strspn(flowstr, "),") :]
+
+        return flowstr
+
+    def dpstr(self, mask=None, more=False):
+        print_str = ""
+
+        for field in (
+            (
+                "OVS_KEY_ATTR_PRIORITY",
+                "skb_priority",
+                "%d",
+                lambda x: False,
+                True,
+            ),
+            (
+                "OVS_KEY_ATTR_SKB_MARK",
+                "skb_mark",
+                "%d",
+                lambda x: False,
+                True,
+            ),
+            (
+                "OVS_KEY_ATTR_RECIRC_ID",
+                "recirc_id",
+                "0x%08X",
+                lambda x: False,
+                True,
+            ),
+            (
+                "OVS_KEY_ATTR_DP_HASH",
+                "dp_hash",
+                "0x%08X",
+                lambda x: False,
+                True,
+            ),
+            (
+                "OVS_KEY_ATTR_CT_STATE",
+                "ct_state",
+                "0x%04x",
+                lambda x: False,
+                True,
+            ),
+            (
+                "OVS_KEY_ATTR_CT_ZONE",
+                "ct_zone",
+                "0x%04x",
+                lambda x: False,
+                True,
+            ),
+            (
+                "OVS_KEY_ATTR_CT_MARK",
+                "ct_mark",
+                "0x%08x",
+                lambda x: False,
+                True,
+            ),
+            (
+                "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4",
+                None,
+                None,
+                False,
+                False,
+            ),
+            (
+                "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6",
+                None,
+                None,
+                False,
+                False,
+            ),
+            (
+                "OVS_KEY_ATTR_IN_PORT",
+                "in_port",
+                "%d",
+                lambda x: True,
+                True,
+            ),
+            ("OVS_KEY_ATTR_ETHERNET", None, None, False, False),
+            (
+                "OVS_KEY_ATTR_ETHERTYPE",
+                "eth_type",
+                "0x%04x",
+                lambda x: int(x) == 0xFFFF,
+                True,
+            ),
+            ("OVS_KEY_ATTR_IPV4", None, None, False, False),
+            ("OVS_KEY_ATTR_IPV6", None, None, False, False),
+            ("OVS_KEY_ATTR_ARP", None, None, False, False),
+            ("OVS_KEY_ATTR_TCP", None, None, False, False),
+            (
+                "OVS_KEY_ATTR_TCP_FLAGS",
+                "tcp_flags",
+                "0x%04x",
+                lambda x: False,
+                True,
+            ),
+            ("OVS_KEY_ATTR_UDP", None, None, False, False),
+            ("OVS_KEY_ATTR_SCTP", None, None, False, False),
+            ("OVS_KEY_ATTR_ICMP", None, None, False, False),
+            ("OVS_KEY_ATTR_ICMPV6", None, None, False, False),
+            ("OVS_KEY_ATTR_ND", None, None, False, False),
+        ):
+            v = self.get_attr(field[0])
+            if v is not None:
+                m = None if mask is None else mask.get_attr(field[0])
+                if field[4] is False:
+                    print_str += v.dpstr(m, more)
+                    print_str += ","
+                else:
+                    if m is None or field[3](m):
+                        print_str += field[1] + "("
+                        print_str += field[2] % v
+                        print_str += "),"
+                    elif more or m != 0:
+                        print_str += field[1] + "("
+                        print_str += (field[2] % v) + "/" + (field[2] % m)
+                        print_str += "),"
+
+        return print_str
+
+
+class OvsPacket(GenericNetlinkSocket):
+    OVS_PACKET_CMD_MISS = 1  # Flow table miss
+    OVS_PACKET_CMD_ACTION = 2  # USERSPACE action
+    OVS_PACKET_CMD_EXECUTE = 3  # Apply actions to packet
+
+    class ovs_packet_msg(ovs_dp_msg):
+        nla_map = (
+            ("OVS_PACKET_ATTR_UNSPEC", "none"),
+            ("OVS_PACKET_ATTR_PACKET", "array(uint8)"),
+            ("OVS_PACKET_ATTR_KEY", "ovskey"),
+            ("OVS_PACKET_ATTR_ACTIONS", "ovsactions"),
+            ("OVS_PACKET_ATTR_USERDATA", "none"),
+            ("OVS_PACKET_ATTR_EGRESS_TUN_KEY", "none"),
+            ("OVS_PACKET_ATTR_UNUSED1", "none"),
+            ("OVS_PACKET_ATTR_UNUSED2", "none"),
+            ("OVS_PACKET_ATTR_PROBE", "none"),
+            ("OVS_PACKET_ATTR_MRU", "uint16"),
+            ("OVS_PACKET_ATTR_LEN", "uint32"),
+            ("OVS_PACKET_ATTR_HASH", "uint64"),
+        )
+
+    def __init__(self):
+        GenericNetlinkSocket.__init__(self)
+        self.bind(OVS_PACKET_FAMILY, OvsPacket.ovs_packet_msg)
+
+    def upcall_handler(self, up=None):
+        print("listening on upcall packet handler:", self.epid)
+        while True:
+            try:
+                msgs = self.get()
+                for msg in msgs:
+                    if not up:
+                        continue
+                    if msg["cmd"] == OvsPacket.OVS_PACKET_CMD_MISS:
+                        up.miss(msg)
+                    elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_ACTION:
+                        up.action(msg)
+                    elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE:
+                        up.execute(msg)
+                    else:
+                        print("Unkonwn cmd: %d" % msg["cmd"])
+            except NetlinkError as ne:
+                raise ne
+
+
+class OvsDatapath(GenericNetlinkSocket):
+    OVS_DP_F_VPORT_PIDS = 1 << 1
+    OVS_DP_F_DISPATCH_UPCALL_PER_CPU = 1 << 3
+
+    class dp_cmd_msg(ovs_dp_msg):
+        """
+        Message class that will be used to communicate with the kernel module.
+        """
+
+        nla_map = (
+            ("OVS_DP_ATTR_UNSPEC", "none"),
+            ("OVS_DP_ATTR_NAME", "asciiz"),
+            ("OVS_DP_ATTR_UPCALL_PID", "array(uint32)"),
+            ("OVS_DP_ATTR_STATS", "dpstats"),
+            ("OVS_DP_ATTR_MEGAFLOW_STATS", "megaflowstats"),
+            ("OVS_DP_ATTR_USER_FEATURES", "uint32"),
+            ("OVS_DP_ATTR_PAD", "none"),
+            ("OVS_DP_ATTR_MASKS_CACHE_SIZE", "uint32"),
+            ("OVS_DP_ATTR_PER_CPU_PIDS", "array(uint32)"),
+        )
+
+        class dpstats(nla):
+            fields = (
+                ("hit", "=Q"),
+                ("missed", "=Q"),
+                ("lost", "=Q"),
+                ("flows", "=Q"),
+            )
+
+        class megaflowstats(nla):
+            fields = (
+                ("mask_hit", "=Q"),
+                ("masks", "=I"),
+                ("padding", "=I"),
+                ("cache_hits", "=Q"),
+                ("pad1", "=Q"),
+            )
+
+    def __init__(self):
+        GenericNetlinkSocket.__init__(self)
+        self.bind(OVS_DATAPATH_FAMILY, OvsDatapath.dp_cmd_msg)
+
+    def info(self, dpname, ifindex=0):
+        msg = OvsDatapath.dp_cmd_msg()
+        msg["cmd"] = OVS_DP_CMD_GET
+        msg["version"] = OVS_DATAPATH_VERSION
+        msg["reserved"] = 0
+        msg["dpifindex"] = ifindex
+        msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname])
+
+        try:
+            reply = self.nlm_request(
+                msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            if ne.code == errno.ENODEV:
+                reply = None
+            else:
+                raise ne
+
+        return reply
+
+    def create(
+        self, dpname, shouldUpcall=False, versionStr=None, p=OvsPacket()
+    ):
+        msg = OvsDatapath.dp_cmd_msg()
+        msg["cmd"] = OVS_DP_CMD_NEW
+        if versionStr is None:
+            msg["version"] = OVS_DATAPATH_VERSION
+        else:
+            msg["version"] = int(versionStr.split(":")[0], 0)
+        msg["reserved"] = 0
+        msg["dpifindex"] = 0
+        msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname])
+
+        dpfeatures = 0
+        if versionStr is not None and versionStr.find(":") != -1:
+            dpfeatures = int(versionStr.split(":")[1], 0)
+        else:
+            if versionStr is None or versionStr.find(":") == -1:
+                dpfeatures |= OvsDatapath.OVS_DP_F_DISPATCH_UPCALL_PER_CPU
+                dpfeatures &= ~OvsDatapath.OVS_DP_F_VPORT_PIDS
+
+            nproc = multiprocessing.cpu_count()
+            procarray = []
+            for i in range(1, nproc):
+                procarray += [int(p.epid)]
+            msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", procarray])
+        msg["attrs"].append(["OVS_DP_ATTR_USER_FEATURES", dpfeatures])
+        if not shouldUpcall:
+            msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", [0]])
+
+        try:
+            reply = self.nlm_request(
+                msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            if ne.code == errno.EEXIST:
+                reply = None
+            else:
+                raise ne
+
+        return reply
+
+    def destroy(self, dpname):
+        msg = OvsDatapath.dp_cmd_msg()
+        msg["cmd"] = OVS_DP_CMD_DEL
+        msg["version"] = OVS_DATAPATH_VERSION
+        msg["reserved"] = 0
+        msg["dpifindex"] = 0
+        msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname])
+
+        try:
+            reply = self.nlm_request(
+                msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            if ne.code == errno.ENODEV:
+                reply = None
+            else:
+                raise ne
+
+        return reply
+
+
+class OvsVport(GenericNetlinkSocket):
+    OVS_VPORT_TYPE_NETDEV = 1
+    OVS_VPORT_TYPE_INTERNAL = 2
+    OVS_VPORT_TYPE_GRE = 3
+    OVS_VPORT_TYPE_VXLAN = 4
+    OVS_VPORT_TYPE_GENEVE = 5
+
+    class ovs_vport_msg(ovs_dp_msg):
+        nla_map = (
+            ("OVS_VPORT_ATTR_UNSPEC", "none"),
+            ("OVS_VPORT_ATTR_PORT_NO", "uint32"),
+            ("OVS_VPORT_ATTR_TYPE", "uint32"),
+            ("OVS_VPORT_ATTR_NAME", "asciiz"),
+            ("OVS_VPORT_ATTR_OPTIONS", "none"),
+            ("OVS_VPORT_ATTR_UPCALL_PID", "array(uint32)"),
+            ("OVS_VPORT_ATTR_STATS", "vportstats"),
+            ("OVS_VPORT_ATTR_PAD", "none"),
+            ("OVS_VPORT_ATTR_IFINDEX", "uint32"),
+            ("OVS_VPORT_ATTR_NETNSID", "uint32"),
+        )
+
+        class vportstats(nla):
+            fields = (
+                ("rx_packets", "=Q"),
+                ("tx_packets", "=Q"),
+                ("rx_bytes", "=Q"),
+                ("tx_bytes", "=Q"),
+                ("rx_errors", "=Q"),
+                ("tx_errors", "=Q"),
+                ("rx_dropped", "=Q"),
+                ("tx_dropped", "=Q"),
+            )
+
+    def type_to_str(vport_type):
+        if vport_type == OvsVport.OVS_VPORT_TYPE_NETDEV:
+            return "netdev"
+        elif vport_type == OvsVport.OVS_VPORT_TYPE_INTERNAL:
+            return "internal"
+        elif vport_type == OvsVport.OVS_VPORT_TYPE_GRE:
+            return "gre"
+        elif vport_type == OvsVport.OVS_VPORT_TYPE_VXLAN:
+            return "vxlan"
+        elif vport_type == OvsVport.OVS_VPORT_TYPE_GENEVE:
+            return "geneve"
+        raise ValueError("Unknown vport type:%d" % vport_type)
+
+    def str_to_type(vport_type):
+        if vport_type == "netdev":
+            return OvsVport.OVS_VPORT_TYPE_NETDEV
+        elif vport_type == "internal":
+            return OvsVport.OVS_VPORT_TYPE_INTERNAL
+        elif vport_type == "gre":
+            return OvsVport.OVS_VPORT_TYPE_INTERNAL
+        elif vport_type == "vxlan":
+            return OvsVport.OVS_VPORT_TYPE_VXLAN
+        elif vport_type == "geneve":
+            return OvsVport.OVS_VPORT_TYPE_GENEVE
+        raise ValueError("Unknown vport type: '%s'" % vport_type)
+
+    def __init__(self, packet=OvsPacket()):
+        GenericNetlinkSocket.__init__(self)
+        self.bind(OVS_VPORT_FAMILY, OvsVport.ovs_vport_msg)
+        self.upcall_packet = packet
+
+    def info(self, vport_name, dpifindex=0, portno=None):
+        msg = OvsVport.ovs_vport_msg()
+
+        msg["cmd"] = OVS_VPORT_CMD_GET
+        msg["version"] = OVS_DATAPATH_VERSION
+        msg["reserved"] = 0
+        msg["dpifindex"] = dpifindex
+
+        if portno is None:
+            msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_name])
+        else:
+            msg["attrs"].append(["OVS_VPORT_ATTR_PORT_NO", portno])
+
+        try:
+            reply = self.nlm_request(
+                msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            if ne.code == errno.ENODEV:
+                reply = None
+            else:
+                raise ne
+        return reply
+
+    def attach(self, dpindex, vport_ifname, ptype):
+        msg = OvsVport.ovs_vport_msg()
+
+        msg["cmd"] = OVS_VPORT_CMD_NEW
+        msg["version"] = OVS_DATAPATH_VERSION
+        msg["reserved"] = 0
+        msg["dpifindex"] = dpindex
+        port_type = OvsVport.str_to_type(ptype)
+
+        msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type])
+        msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+        msg["attrs"].append(
+            ["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]]
+        )
+
+        try:
+            reply = self.nlm_request(
+                msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            if ne.code == errno.EEXIST:
+                reply = None
+            else:
+                raise ne
+        return reply
+
+    def reset_upcall(self, dpindex, vport_ifname, p=None):
+        msg = OvsVport.ovs_vport_msg()
+
+        msg["cmd"] = OVS_VPORT_CMD_SET
+        msg["version"] = OVS_DATAPATH_VERSION
+        msg["reserved"] = 0
+        msg["dpifindex"] = dpindex
+        msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+
+        if p == None:
+            p = self.upcall_packet
+        else:
+            self.upcall_packet = p
+
+        msg["attrs"].append(["OVS_VPORT_ATTR_UPCALL_PID", [p.epid]])
+
+        try:
+            reply = self.nlm_request(
+                msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            raise ne
+        return reply
+
+    def detach(self, dpindex, vport_ifname):
+        msg = OvsVport.ovs_vport_msg()
+
+        msg["cmd"] = OVS_VPORT_CMD_DEL
+        msg["version"] = OVS_DATAPATH_VERSION
+        msg["reserved"] = 0
+        msg["dpifindex"] = dpindex
+        msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+
+        try:
+            reply = self.nlm_request(
+                msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            if ne.code == errno.ENODEV:
+                reply = None
+            else:
+                raise ne
+        return reply
+
+    def upcall_handler(self, handler=None):
+        self.upcall_packet.upcall_handler(handler)
+
+
+class OvsFlow(GenericNetlinkSocket):
+    class ovs_flow_msg(ovs_dp_msg):
+        nla_map = (
+            ("OVS_FLOW_ATTR_UNSPEC", "none"),
+            ("OVS_FLOW_ATTR_KEY", "ovskey"),
+            ("OVS_FLOW_ATTR_ACTIONS", "ovsactions"),
+            ("OVS_FLOW_ATTR_STATS", "flowstats"),
+            ("OVS_FLOW_ATTR_TCP_FLAGS", "uint8"),
+            ("OVS_FLOW_ATTR_USED", "uint64"),
+            ("OVS_FLOW_ATTR_CLEAR", "none"),
+            ("OVS_FLOW_ATTR_MASK", "ovskey"),
+            ("OVS_FLOW_ATTR_PROBE", "none"),
+            ("OVS_FLOW_ATTR_UFID", "array(uint32)"),
+            ("OVS_FLOW_ATTR_UFID_FLAGS", "uint32"),
+        )
+
+        class flowstats(nla):
+            fields = (
+                ("packets", "=Q"),
+                ("bytes", "=Q"),
+            )
+
+        def dpstr(self, more=False):
+            ufid = self.get_attr("OVS_FLOW_ATTR_UFID")
+            ufid_str = ""
+            if ufid is not None:
+                ufid_str = (
+                    "ufid:{:08x}-{:04x}-{:04x}-{:04x}-{:04x}{:08x}".format(
+                        ufid[0],
+                        ufid[1] >> 16,
+                        ufid[1] & 0xFFFF,
+                        ufid[2] >> 16,
+                        ufid[2] & 0,
+                        ufid[3],
+                    )
+                )
+
+            key_field = self.get_attr("OVS_FLOW_ATTR_KEY")
+            keymsg = None
+            if key_field is not None:
+                keymsg = key_field
+
+            mask_field = self.get_attr("OVS_FLOW_ATTR_MASK")
+            maskmsg = None
+            if mask_field is not None:
+                maskmsg = mask_field
+
+            acts_field = self.get_attr("OVS_FLOW_ATTR_ACTIONS")
+            actsmsg = None
+            if acts_field is not None:
+                actsmsg = acts_field
+
+            print_str = ""
+
+            if more:
+                print_str += ufid_str + ","
+
+            if keymsg is not None:
+                print_str += keymsg.dpstr(maskmsg, more)
+
+            stats = self.get_attr("OVS_FLOW_ATTR_STATS")
+            if stats is None:
+                print_str += " packets:0, bytes:0,"
+            else:
+                print_str += " packets:%d, bytes:%d," % (
+                    stats["packets"],
+                    stats["bytes"],
+                )
+
+            used = self.get_attr("OVS_FLOW_ATTR_USED")
+            print_str += " used:"
+            if used is None:
+                print_str += "never,"
+            else:
+                used_time = int(used)
+                cur_time_sec = time.clock_gettime(time.CLOCK_MONOTONIC)
+                used_time = (cur_time_sec * 1000) - used_time
+                print_str += "{}s,".format(used_time / 1000)
+
+            print_str += " actions:"
+            if (
+                actsmsg is None
+                or "attrs" not in actsmsg
+                or len(actsmsg["attrs"]) == 0
+            ):
+                print_str += "drop"
+            else:
+                print_str += actsmsg.dpstr(more)
+
+            return print_str
+
+        def parse(self, flowstr, actstr, dpidx=0):
+            OVS_UFID_F_OMIT_KEY = 1 << 0
+            OVS_UFID_F_OMIT_MASK = 1 << 1
+            OVS_UFID_F_OMIT_ACTIONS = 1 << 2
+
+            self["cmd"] = 0
+            self["version"] = 0
+            self["reserved"] = 0
+            self["dpifindex"] = 0
+
+            if flowstr.startswith("ufid:"):
+                count = 5
+                while flowstr[count] != ",":
+                    count += 1
+                ufidstr = flowstr[5:count]
+                flowstr = flowstr[count + 1 :]
+            else:
+                ufidstr = str(uuid.uuid4())
+            uuidRawObj = uuid.UUID(ufidstr).fields
+
+            self["attrs"].append(
+                [
+                    "OVS_FLOW_ATTR_UFID",
+                    [
+                        uuidRawObj[0],
+                        uuidRawObj[1] << 16 | uuidRawObj[2],
+                        uuidRawObj[3] << 24
+                        | uuidRawObj[4] << 16
+                        | uuidRawObj[5] & (0xFF << 32) >> 32,
+                        uuidRawObj[5] & (0xFFFFFFFF),
+                    ],
+                ]
+            )
+            self["attrs"].append(
+                [
+                    "OVS_FLOW_ATTR_UFID_FLAGS",
+                    int(
+                        OVS_UFID_F_OMIT_KEY
+                        | OVS_UFID_F_OMIT_MASK
+                        | OVS_UFID_F_OMIT_ACTIONS
+                    ),
+                ]
+            )
+
+            k = ovskey()
+            m = ovskey()
+            k.parse(flowstr, m)
+            self["attrs"].append(["OVS_FLOW_ATTR_KEY", k])
+            self["attrs"].append(["OVS_FLOW_ATTR_MASK", m])
+
+            a = ovsactions()
+            a.parse(actstr)
+            self["attrs"].append(["OVS_FLOW_ATTR_ACTIONS", a])
+
+    def __init__(self):
+        GenericNetlinkSocket.__init__(self)
+
+        self.bind(OVS_FLOW_FAMILY, OvsFlow.ovs_flow_msg)
+
+    def add_flow(self, dpifindex, flowmsg):
+        """
+        Send a new flow message to the kernel.
+
+        dpifindex should be a valid datapath obtained by calling
+        into the OvsDatapath lookup
+
+        flowmsg is a flow object obtained by calling a dpparse
+        """
+
+        flowmsg["cmd"] = OVS_FLOW_CMD_NEW
+        flowmsg["version"] = OVS_DATAPATH_VERSION
+        flowmsg["reserved"] = 0
+        flowmsg["dpifindex"] = dpifindex
+
+        try:
+            reply = self.nlm_request(
+                flowmsg,
+                msg_type=self.prid,
+                msg_flags=NLM_F_REQUEST | NLM_F_ACK,
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            print(flowmsg)
+            raise ne
+        return reply
+
+    def del_flows(self, dpifindex):
+        """
+        Send a del message to the kernel that will drop all flows.
+
+        dpifindex should be a valid datapath obtained by calling
+        into the OvsDatapath lookup
+        """
+
+        flowmsg = OvsFlow.ovs_flow_msg()
+        flowmsg["cmd"] = OVS_FLOW_CMD_DEL
+        flowmsg["version"] = OVS_DATAPATH_VERSION
+        flowmsg["reserved"] = 0
+        flowmsg["dpifindex"] = dpifindex
+
+        try:
+            reply = self.nlm_request(
+                flowmsg,
+                msg_type=self.prid,
+                msg_flags=NLM_F_REQUEST | NLM_F_ACK,
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            print(flowmsg)
+            raise ne
+        return reply
+
+    def dump(self, dpifindex, flowspec=None):
+        """
+        Returns a list of messages containing flows.
+
+        dpifindex should be a valid datapath obtained by calling
+        into the OvsDatapath lookup
+
+        flowpsec is a string which represents a flow in the dpctl
+        format.
+        """
+        msg = OvsFlow.ovs_flow_msg()
+
+        msg["cmd"] = OVS_FLOW_CMD_GET
+        msg["version"] = OVS_DATAPATH_VERSION
+        msg["reserved"] = 0
+        msg["dpifindex"] = dpifindex
+
+        msg_flags = NLM_F_REQUEST | NLM_F_ACK
+        if flowspec is None:
+            msg_flags |= NLM_F_DUMP
+        rep = None
+
+        try:
+            rep = self.nlm_request(
+                msg,
+                msg_type=self.prid,
+                msg_flags=msg_flags,
+            )
+        except NetlinkError as ne:
+            raise ne
+        return rep
+
+    def miss(self, packetmsg):
+        seq = packetmsg["header"]["sequence_number"]
+        keystr = "(none)"
+        key_field = packetmsg.get_attr("OVS_PACKET_ATTR_KEY")
+        if key_field is not None:
+            keystr = key_field.dpstr(None, True)
+
+        pktdata = packetmsg.get_attr("OVS_PACKET_ATTR_PACKET")
+        pktpres = "yes" if pktdata is not None else "no"
+
+        print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True)
+
+    def execute(self, packetmsg):
+        print("userspace execute command")
+
+    def action(self, packetmsg):
+        print("userspace action command")
+
+
+def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()):
+    dp_name = dp_lookup_rep.get_attr("OVS_DP_ATTR_NAME")
+    base_stats = dp_lookup_rep.get_attr("OVS_DP_ATTR_STATS")
+    megaflow_stats = dp_lookup_rep.get_attr("OVS_DP_ATTR_MEGAFLOW_STATS")
+    user_features = dp_lookup_rep.get_attr("OVS_DP_ATTR_USER_FEATURES")
+    masks_cache_size = dp_lookup_rep.get_attr("OVS_DP_ATTR_MASKS_CACHE_SIZE")
+
+    print("%s:" % dp_name)
+    print(
+        "  lookups: hit:%d missed:%d lost:%d"
+        % (base_stats["hit"], base_stats["missed"], base_stats["lost"])
+    )
+    print("  flows:%d" % base_stats["flows"])
+    pkts = base_stats["hit"] + base_stats["missed"]
+    avg = (megaflow_stats["mask_hit"] / pkts) if pkts != 0 else 0.0
+    print(
+        "  masks: hit:%d total:%d hit/pkt:%f"
+        % (megaflow_stats["mask_hit"], megaflow_stats["masks"], avg)
+    )
+    print("  caches:")
+    print("    masks-cache: size:%d" % masks_cache_size)
+
+    if user_features is not None:
+        print("  features: 0x%X" % user_features)
+
+    # port print out
+    for iface in ndb.interfaces:
+        rep = vpl.info(iface.ifname, ifindex)
+        if rep is not None:
+            print(
+                "  port %d: %s (%s)"
+                % (
+                    rep.get_attr("OVS_VPORT_ATTR_PORT_NO"),
+                    rep.get_attr("OVS_VPORT_ATTR_NAME"),
+                    OvsVport.type_to_str(rep.get_attr("OVS_VPORT_ATTR_TYPE")),
+                )
+            )
+
+
+def main(argv):
+    nlmsg_atoms.ovskey = ovskey
+    nlmsg_atoms.ovsactions = ovsactions
+
+    # version check for pyroute2
+    prverscheck = pyroute2.__version__.split(".")
+    if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6:
+        print("Need to upgrade the python pyroute2 package to >= 0.6.")
+        sys.exit(0)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="count",
+        help="Increment 'verbose' output counter.",
+        default=0,
+    )
+    subparsers = parser.add_subparsers()
+
+    showdpcmd = subparsers.add_parser("show")
+    showdpcmd.add_argument(
+        "showdp", metavar="N", type=str, nargs="?", help="Datapath Name"
+    )
+
+    adddpcmd = subparsers.add_parser("add-dp")
+    adddpcmd.add_argument("adddp", help="Datapath Name")
+    adddpcmd.add_argument(
+        "-u",
+        "--upcall",
+        action="store_true",
+        help="Leave open a reader for upcalls",
+    )
+    adddpcmd.add_argument(
+        "-V",
+        "--versioning",
+        required=False,
+        help="Specify a custom version / feature string",
+    )
+
+    deldpcmd = subparsers.add_parser("del-dp")
+    deldpcmd.add_argument("deldp", help="Datapath Name")
+
+    addifcmd = subparsers.add_parser("add-if")
+    addifcmd.add_argument("dpname", help="Datapath Name")
+    addifcmd.add_argument("addif", help="Interface name for adding")
+    addifcmd.add_argument(
+        "-u",
+        "--upcall",
+        action="store_true",
+        help="Leave open a reader for upcalls",
+    )
+    addifcmd.add_argument(
+        "-t",
+        "--ptype",
+        type=str,
+        default="netdev",
+        choices=["netdev", "internal"],
+        help="Interface type (default netdev)",
+    )
+    delifcmd = subparsers.add_parser("del-if")
+    delifcmd.add_argument("dpname", help="Datapath Name")
+    delifcmd.add_argument("delif", help="Interface name for adding")
+
+    dumpflcmd = subparsers.add_parser("dump-flows")
+    dumpflcmd.add_argument("dumpdp", help="Datapath Name")
+
+    addflcmd = subparsers.add_parser("add-flow")
+    addflcmd.add_argument("flbr", help="Datapath name")
+    addflcmd.add_argument("flow", help="Flow specification")
+    addflcmd.add_argument("acts", help="Flow actions")
+
+    delfscmd = subparsers.add_parser("del-flows")
+    delfscmd.add_argument("flsbr", help="Datapath name")
+
+    args = parser.parse_args()
+
+    if args.verbose > 0:
+        if args.verbose > 1:
+            logging.basicConfig(level=logging.DEBUG)
+
+    ovspk = OvsPacket()
+    ovsdp = OvsDatapath()
+    ovsvp = OvsVport(ovspk)
+    ovsflow = OvsFlow()
+    ndb = NDB()
+
+    sys.setrecursionlimit(100000)
+
+    if hasattr(args, "showdp"):
+        found = False
+        for iface in ndb.interfaces:
+            rep = None
+            if args.showdp is None:
+                rep = ovsdp.info(iface.ifname, 0)
+            elif args.showdp == iface.ifname:
+                rep = ovsdp.info(iface.ifname, 0)
+
+            if rep is not None:
+                found = True
+                print_ovsdp_full(rep, iface.index, ndb, ovsvp)
+
+        if not found:
+            msg = "No DP found"
+            if args.showdp is not None:
+                msg += ":'%s'" % args.showdp
+            print(msg)
+    elif hasattr(args, "adddp"):
+        rep = ovsdp.create(args.adddp, args.upcall, args.versioning, ovspk)
+        if rep is None:
+            print("DP '%s' already exists" % args.adddp)
+        else:
+            print("DP '%s' added" % args.adddp)
+        if args.upcall:
+            ovspk.upcall_handler(ovsflow)
+    elif hasattr(args, "deldp"):
+        ovsdp.destroy(args.deldp)
+    elif hasattr(args, "addif"):
+        rep = ovsdp.info(args.dpname, 0)
+        if rep is None:
+            print("DP '%s' not found." % args.dpname)
+            return 1
+        dpindex = rep["dpifindex"]
+        rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype)
+        msg = "vport '%s'" % args.addif
+        if rep and rep["header"]["error"] is None:
+            msg += " added."
+        else:
+            msg += " failed to add."
+        if args.upcall:
+            if rep is None:
+                rep = ovsvp.reset_upcall(dpindex, args.addif, ovspk)
+            ovsvp.upcall_handler(ovsflow)
+    elif hasattr(args, "delif"):
+        rep = ovsdp.info(args.dpname, 0)
+        if rep is None:
+            print("DP '%s' not found." % args.dpname)
+            return 1
+        rep = ovsvp.detach(rep["dpifindex"], args.delif)
+        msg = "vport '%s'" % args.delif
+        if rep and rep["header"]["error"] is None:
+            msg += " removed."
+        else:
+            msg += " failed to remove."
+    elif hasattr(args, "dumpdp"):
+        rep = ovsdp.info(args.dumpdp, 0)
+        if rep is None:
+            print("DP '%s' not found." % args.dumpdp)
+            return 1
+        rep = ovsflow.dump(rep["dpifindex"])
+        for flow in rep:
+            print(flow.dpstr(True if args.verbose > 0 else False))
+    elif hasattr(args, "flbr"):
+        rep = ovsdp.info(args.flbr, 0)
+        if rep is None:
+            print("DP '%s' not found." % args.flbr)
+            return 1
+        flow = OvsFlow.ovs_flow_msg()
+        flow.parse(args.flow, args.acts, rep["dpifindex"])
+        ovsflow.add_flow(rep["dpifindex"], flow)
+    elif hasattr(args, "flsbr"):
+        rep = ovsdp.info(args.flsbr, 0)
+        if rep is None:
+            print("DP '%s' not found." % args.flsbr)
+        ovsflow.del_flows(rep["dpifindex"])
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 6bbf69a28e12..cfc84958025a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 #
 # Check that route PMTU values match expectations, and that initial device MTU
@@ -26,6 +26,15 @@
 # - pmtu_ipv6
 #	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
 #
+# - pmtu_ipv4_dscp_icmp_exception
+#	Set up the same network topology as pmtu_ipv4, but use non-default
+#	routing table in A. A fib-rule is used to jump to this routing table
+#	based on DSCP. Send ICMPv4 packets with the expected DSCP value and
+#	verify that ECN doesn't interfere with the creation of PMTU exceptions.
+#
+# - pmtu_ipv4_dscp_udp_exception
+#	Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
+#
 # - pmtu_ipv4_vxlan4_exception
 #	Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
 #	over IPv4 between A and B, routed via R1. On the link between R1 and B,
@@ -118,6 +127,16 @@
 #	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
 #	changes alone won't affect PMTU
 #
+# - pmtu_vti4_udp_exception
+#       Same as pmtu_vti4_exception, but using ESP-in-UDP
+#
+# - pmtu_vti4_udp_routed_exception
+#       Set up vti tunnel on top of veth connected through routing namespace and
+#	add xfrm states and policies with ESP-in-UDP encapsulation. Check that
+#	route exception is not created if link layer MTU is not exceeded, then
+#	lower MTU on second part of routed environment and check that exception
+#	is created with the expected PMTU.
+#
 # - pmtu_vti6_exception
 #	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is
@@ -125,6 +144,13 @@
 #	decrease and increase MTU of tunnel, checking that route exception PMTU
 #	changes accordingly
 #
+# - pmtu_vti6_udp_exception
+#       Same as pmtu_vti6_exception, but using ESP-in-UDP
+#
+# - pmtu_vti6_udp_routed_exception
+#	Same as pmtu_vti6_udp_routed_exception but with routing between vti
+#	endpoints
+#
 # - pmtu_vti4_default_mtu
 #	Set up vti4 tunnel on top of veth, in two namespaces with matching
 #	endpoints. Check that MTU assigned to vti interface is the MTU of the
@@ -162,10 +188,18 @@
 # - list_flush_ipv6_exception
 #	Using the same topology as in pmtu_ipv6, create exceptions, and check
 #	they are shown when listing exception caches, gone after flushing them
+#
+# - pmtu_ipv4_route_change
+#	Use the same topology as in pmtu_ipv4, but issue a route replacement
+#	command and delete the corresponding device afterward. This tests for
+#	proper cleanup of the PMTU exceptions by the route replacement path.
+#	Device unregistration should complete successfully
+#
+# - pmtu_ipv6_route_change
+#	Same as above but with IPv6
 
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
+source net_helper.sh
 
 PAUSE_ON_FAIL=no
 VERBOSE=0
@@ -178,6 +212,8 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 tests="
 	pmtu_ipv4_exception		ipv4: PMTU exceptions			1
 	pmtu_ipv6_exception		ipv6: PMTU exceptions			1
+	pmtu_ipv4_dscp_icmp_exception	ICMPv4 with DSCP and ECN: PMTU exceptions	1
+	pmtu_ipv4_dscp_udp_exception	UDPv4 with DSCP and ECN: PMTU exceptions	1
 	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions	1
 	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions	1
 	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions	1
@@ -216,6 +252,10 @@ tests="
 	pmtu_ipv6_ipv6_exception	IPv6 over IPv6: PMTU exceptions		1
 	pmtu_vti6_exception		vti6: PMTU exceptions			0
 	pmtu_vti4_exception		vti4: PMTU exceptions			0
+	pmtu_vti6_udp_exception		vti6: PMTU exceptions (ESP-in-UDP)	0
+	pmtu_vti4_udp_exception		vti4: PMTU exceptions (ESP-in-UDP)	0
+	pmtu_vti6_udp_routed_exception	vti6: PMTU exceptions, routed (ESP-in-UDP)	0
+	pmtu_vti4_udp_routed_exception	vti4: PMTU exceptions, routed (ESP-in-UDP)	0
 	pmtu_vti4_default_mtu		vti4: default MTU assignment		0
 	pmtu_vti6_default_mtu		vti6: default MTU assignment		0
 	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation	0
@@ -224,18 +264,9 @@ tests="
 	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions	1
 	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions	1
 	list_flush_ipv4_exception	ipv4: list and flush cached exceptions	1
-	list_flush_ipv6_exception	ipv6: list and flush cached exceptions	1"
-
-NS_A="ns-A"
-NS_B="ns-B"
-NS_C="ns-C"
-NS_R1="ns-R1"
-NS_R2="ns-R2"
-ns_a="ip netns exec ${NS_A}"
-ns_b="ip netns exec ${NS_B}"
-ns_c="ip netns exec ${NS_C}"
-ns_r1="ip netns exec ${NS_R1}"
-ns_r2="ip netns exec ${NS_R2}"
+	list_flush_ipv6_exception	ipv6: list and flush cached exceptions	1
+	pmtu_ipv4_route_change		ipv4: PMTU exception w/route replace	1
+	pmtu_ipv6_route_change		ipv6: PMTU exception w/route replace	1"
 
 # Addressing and routing for tests with routers: four network segments, with
 # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
@@ -269,7 +300,6 @@ routes="
 	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
 	B	default			${prefix6}:${b_r1}::2
 "
-
 USE_NH="no"
 #	ns	family	nh id	   destination		gateway
 nexthops="
@@ -294,6 +324,9 @@ routes_nh="
 	B	6	default			61
 "
 
+policy_mark=0x04
+rt_table=main
+
 veth4_a_addr="192.168.1.1"
 veth4_b_addr="192.168.1.2"
 veth4_c_addr="192.168.2.10"
@@ -316,6 +349,9 @@ dummy6_mask="64"
 
 err_buf=
 tcpdump_pids=
+nettest_pids=
+socat_pids=
+tmpoutfile=
 
 err() {
 	err_buf="${err_buf}${1}
@@ -344,6 +380,16 @@ run_cmd() {
 	return $rc
 }
 
+run_cmd_bg() {
+	cmd="$*"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: %s &\n" "${cmd}"
+	fi
+
+	$cmd 2>&1 &
+}
+
 # Find the auto-generated name for this namespace
 nsname() {
 	eval echo \$NS_$1
@@ -355,7 +401,7 @@ setup_fou_or_gue() {
 	encap="${3}"
 
 	if [ "${outer}" = "4" ]; then
-		modprobe fou || return 2
+		modprobe fou || return $ksft_skip
 		a_addr="${prefix4}.${a_r1}.1"
 		b_addr="${prefix4}.${b_r1}.1"
 		if [ "${inner}" = "4" ]; then
@@ -366,7 +412,7 @@ setup_fou_or_gue() {
 			ipproto="41"
 		fi
 	else
-		modprobe fou6 || return 2
+		modprobe fou6 || return $ksft_skip
 		a_addr="${prefix6}:${a_r1}::1"
 		b_addr="${prefix6}:${b_r1}::1"
 		if [ "${inner}" = "4" ]; then
@@ -380,8 +426,8 @@ setup_fou_or_gue() {
 		fi
 	fi
 
-	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
-	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip
+	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip
 
 	run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
 	run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
@@ -455,7 +501,7 @@ setup_ipvX_over_ipvY() {
 		fi
 	fi
 
-	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip
 	run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
 
 	run_cmd ${ns_a} ip link set ip_a up
@@ -487,13 +533,17 @@ setup_ip6ip6() {
 }
 
 setup_namespaces() {
+	setup_ns NS_A NS_B NS_C NS_R1 NS_R2
 	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
-		ip netns add ${n} || return 1
-
 		# Disable DAD, so that we don't have to wait to use the
 		# configured IPv6 addresses
 		ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
 	done
+	ns_a="ip netns exec ${NS_A}"
+	ns_b="ip netns exec ${NS_B}"
+	ns_c="ip netns exec ${NS_C}"
+	ns_r1="ip netns exec ${NS_R1}"
+	ns_r2="ip netns exec ${NS_R2}"
 }
 
 setup_veth() {
@@ -538,6 +588,14 @@ setup_vti6() {
 	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
 }
 
+setup_vti4routed() {
+	setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
+}
+
+setup_vti6routed() {
+	setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
+}
+
 setup_vxlan_or_geneve() {
 	type="${1}"
 	a_addr="${2}"
@@ -609,18 +667,38 @@ setup_xfrm() {
 	proto=${1}
 	veth_a_addr="${2}"
 	veth_b_addr="${3}"
+	encap=${4}
 
-	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
-	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1
+	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
 	run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
 	run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
 
-	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
-	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
+	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
 	run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
 	run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
 }
 
+setup_nettest_xfrm() {
+	if ! which nettest >/dev/null; then
+		PATH=$PWD:$PATH
+		if ! which nettest >/dev/null; then
+			echo "'nettest' command not found; skipping tests"
+			return 1
+		fi
+	fi
+
+	[ ${1} -eq 6 ] && proto="-6" || proto=""
+	port=${2}
+
+	run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
+	nettest_pids="${nettest_pids} $!"
+
+	run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
+	nettest_pids="${nettest_pids} $!"
+}
+
 setup_xfrm4() {
 	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
 }
@@ -629,6 +707,26 @@ setup_xfrm6() {
 	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
 }
 
+setup_xfrm4udp() {
+	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \
+		setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udp() {
+	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \
+		setup_nettest_xfrm 6 4500
+}
+
+setup_xfrm4udprouted() {
+	setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \
+		setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udprouted() {
+	setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \
+		setup_nettest_xfrm 6 4500
+}
+
 setup_routing_old() {
 	for i in ${routes}; do
 		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
@@ -637,7 +735,7 @@ setup_routing_old() {
 
 		ns_name="$(nsname ${ns})"
 
-		ip -n ${ns_name} route add ${addr} via ${gw}
+		ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
 
 		ns=""; addr=""; gw=""
 	done
@@ -667,7 +765,7 @@ setup_routing_new() {
 
 		ns_name="$(nsname ${ns})"
 
-		ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
+		ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
 
 		ns=""; fam=""; addr=""; nhid=""
 	done
@@ -712,12 +810,30 @@ setup_routing() {
 	return 0
 }
 
+setup_policy_routing() {
+	setup_routing
+
+	ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
+		table "${rt_table}"
+
+	# Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
+	# have an option do to it.
+	tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
+	tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
+	tc -netns "${NS_A}" filter add dev veth_A-R1                      \
+		protocol ipv4 flower ip_proto udp                         \
+		action pedit ex munge ip df set 0x40 pipe csum ip and udp
+	tc -netns "${NS_A}" filter add dev veth_A-R2                      \
+		protocol ipv4 flower ip_proto udp                         \
+		action pedit ex munge ip df set 0x40 pipe csum ip and udp
+}
+
 setup_bridge() {
-	run_cmd ${ns_a} ip link add br0 type bridge || return 2
+	run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
 	run_cmd ${ns_a} ip link set br0 up
 
 	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
-	run_cmd ${ns_c} ip link set veth_A-C netns ns-A
+	run_cmd ${ns_c} ip link set veth_A-C netns ${NS_A}
 
 	run_cmd ${ns_a} ip link set veth_A-C up
 	run_cmd ${ns_c} ip link set veth_C-A up
@@ -765,7 +881,7 @@ setup_ovs_vxlan6() {
 }
 
 setup_ovs_bridge() {
-	run_cmd ovs-vsctl add-br ovs_br0 || return 2
+	run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
 	run_cmd ip link set ovs_br0 up
 
 	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
@@ -789,7 +905,6 @@ setup_ovs_bridge() {
 setup() {
 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
-	cleanup
 	for arg do
 		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
 	done
@@ -800,7 +915,7 @@ trace() {
 
 	for arg do
 		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
-		${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+		${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
 		tcpdump_pids="${tcpdump_pids} $!"
 		ns_cmd=
 	done
@@ -813,14 +928,23 @@ cleanup() {
 	done
 	tcpdump_pids=
 
-	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
-		ip netns del ${n} 2> /dev/null
+	for pid in ${nettest_pids}; do
+		kill ${pid}
+	done
+	nettest_pids=
+
+	for pid in ${socat_pids}; do
+		kill "${pid}"
 	done
+	socat_pids=
+
+	cleanup_all_ns
 
 	ip link del veth_A-C			2>/dev/null
 	ip link del veth_A-R1			2>/dev/null
 	ovs-vsctl --if-exists del-port vxlan_a	2>/dev/null
 	ovs-vsctl --if-exists del-br ovs_br0	2>/dev/null
+	rm -f "$tmpoutfile"
 }
 
 mtu() {
@@ -860,15 +984,21 @@ link_get_mtu() {
 route_get_dst_exception() {
 	ns_cmd="${1}"
 	dst="${2}"
+	dsfield="${3}"
 
-	${ns_cmd} ip route get "${dst}"
+	if [ -z "${dsfield}" ]; then
+		dsfield=0
+	fi
+
+	${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
 }
 
 route_get_dst_pmtu_from_exception() {
 	ns_cmd="${1}"
 	dst="${2}"
+	dsfield="${3}"
 
-	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+	mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
 }
 
 check_pmtu_value() {
@@ -887,7 +1017,7 @@ check_pmtu_value() {
 test_pmtu_ipvX() {
 	family=${1}
 
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -978,6 +1108,95 @@ test_pmtu_ipv6_exception() {
 	test_pmtu_ipvX 6
 }
 
+test_pmtu_ipv4_dscp_icmp_exception() {
+	rt_table=100
+
+	setup namespaces policy_routing || return $ksft_skip
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1400
+	mtu "${ns_b}"  veth_B-R1 1400
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
+
+	dst1="${prefix4}.${b_r1}.1"
+	dst2="${prefix4}.${b_r2}.1"
+
+	# Create route exceptions
+	dsfield=${policy_mark} # No ECN bit set (Not-ECT)
+	run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
+
+	dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
+	run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
+
+	# Check that exceptions have been created with the correct PMTU
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
+	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
+	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+}
+
+test_pmtu_ipv4_dscp_udp_exception() {
+	rt_table=100
+
+	if ! which socat > /dev/null 2>&1; then
+		echo "'socat' command not found; skipping tests"
+		return $ksft_skip
+	fi
+
+	setup namespaces policy_routing || return $ksft_skip
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1400
+	mtu "${ns_b}"  veth_B-R1 1400
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
+
+	dst1="${prefix4}.${b_r1}.1"
+	dst2="${prefix4}.${b_r2}.1"
+
+	# Create route exceptions
+	run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
+	socat_pids="${socat_pids} $!"
+
+	dsfield=${policy_mark} # No ECN bit set (Not-ECT)
+	run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
+		UDP:"${dst1}":50000,tos="${dsfield}"
+
+	dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
+	run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
+		UDP:"${dst2}":50000,tos="${dsfield}"
+
+	# Check that exceptions have been created with the correct PMTU
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
+	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
+	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+}
+
 test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
 	type=${1}
 	family=${2}
@@ -985,11 +1204,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing ${type}4 || return 2
+		setup namespaces routing ${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing ${type}6 || return 2
+		setup namespaces routing ${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1060,11 +1279,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing bridge bridged_${type}4 || return 2
+		setup namespaces routing bridge bridged_${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing bridge bridged_${type}6 || return 2
+		setup namespaces routing bridge bridged_${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1103,6 +1322,41 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
 	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
 	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
 	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+
+	tmpoutfile=$(mktemp)
+
+	# Flush Exceptions, retry with TCP
+	run_cmd ${ns_a} ip route flush cached ${dst}
+	run_cmd ${ns_b} ip route flush cached ${dst}
+	run_cmd ${ns_c} ip route flush cached ${dst}
+
+	for target in "${ns_a}" "${ns_c}" ; do
+		if [ ${family} -eq 4 ]; then
+			TCPDST=TCP:${dst}:50000
+		else
+			TCPDST="TCP:[${dst}]:50000"
+		fi
+		${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile &
+		local socat_pid=$!
+
+		wait_local_port_listen ${NS_B} 50000 tcp
+
+		dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+
+		wait ${socat_pid}
+		size=$(du -sb $tmpoutfile)
+		size=${size%%/tmp/*}
+
+		[ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+	done
+
+	rm -f "$tmpoutfile"
+
+	# Check that exceptions were created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface"
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface"
 }
 
 test_pmtu_ipv4_br_vxlan4_exception() {
@@ -1144,11 +1398,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing ovs_bridge ovs_${type}4 || return 2
+		setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing ovs_bridge ovs_${type}6 || return 2
+		setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1230,7 +1484,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() {
 	encap=${3}
 	ll_mtu=4000
 
-	setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
+	setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip
 	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
@@ -1309,7 +1563,7 @@ test_pmtu_ipvX_over_ipvY_exception() {
 	outer=${2}
 	ll_mtu=4000
 
-	setup namespaces routing ip${inner}ip${outer} || return 2
+	setup namespaces routing ip${inner}ip${outer} || return $ksft_skip
 
 	trace "${ns_a}" ip_a         "${ns_b}"  ip_b  \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
@@ -1363,7 +1617,7 @@ test_pmtu_ipv6_ipv6_exception() {
 }
 
 test_pmtu_vti4_exception() {
-	setup namespaces veth vti4 xfrm4 || return 2
+	setup namespaces veth vti4 xfrm4 || return $ksft_skip
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
 	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
 
@@ -1393,7 +1647,67 @@ test_pmtu_vti4_exception() {
 }
 
 test_pmtu_vti6_exception() {
-	setup namespaces veth vti6 xfrm6 || return 2
+	setup namespaces veth vti6 xfrm6 || return $ksft_skip
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
+	fail=0
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}" veth_a 4000
+	mtu "${ns_b}" veth_b 4000
+	mtu "${ns_a}" vti6_a 5000
+	mtu "${ns_b}" vti6_b 5000
+	run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
+
+	# Check that exception was created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
+
+	# Decrease tunnel MTU, check for PMTU decrease in route exception
+	mtu "${ns_a}" vti6_a 3000
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
+
+	# Increase tunnel MTU, check for PMTU increase in route exception
+	mtu "${ns_a}" vti6_a 9000
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
+
+	return ${fail}
+}
+
+test_pmtu_vti4_udp_exception() {
+	setup namespaces veth vti4 xfrm4udp || return $ksft_skip
+	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
+	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 20))
+
+	#                                UDP   SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 28))
+
+	mtu "${ns_a}" veth_a ${veth_mtu}
+	mtu "${ns_b}" veth_b ${veth_mtu}
+	mtu "${ns_a}" vti4_a ${vti_mtu}
+	mtu "${ns_b}" vti4_b ${vti_mtu}
+
+	# Send DF packet without exceeding link layer MTU, check that no
+	# exception is created
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+	# Now exceed link layer MTU by one byte, check that exception is created
+	# with the right PMTU value
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
+}
+
+test_pmtu_vti6_udp_exception() {
+	setup namespaces veth vti6 xfrm6udp || return $ksft_skip
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
 	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
 	fail=0
@@ -1422,8 +1736,77 @@ test_pmtu_vti6_exception() {
 	return ${fail}
 }
 
+test_pmtu_vti4_udp_routed_exception() {
+	setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip
+	trace "${ns_a}" veth_A-R1    "${ns_b}" veth_B-R1 \
+	      "${ns_a}" vti4_a       "${ns_b}" vti4_b
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 20))
+
+	#                                UDP   SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 28))
+
+        mtu "${ns_a}"  veth_A-R1 ${veth_mtu}
+        mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+        mtu "${ns_b}"  veth_B-R1 ${veth_mtu}
+        mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+	mtu "${ns_a}" vti4_a ${vti_mtu}
+	mtu "${ns_b}" vti4_b ${vti_mtu}
+
+	# Send DF packet without exceeding link layer MTU, check that no
+	# exception is created
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+	# Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+	# with the right PMTU value
+        mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+	check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+}
+
+test_pmtu_vti6_udp_routed_exception() {
+	setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip
+	trace "${ns_a}" veth_A-R1    "${ns_b}" veth_B-R1 \
+	      "${ns_a}" vti6_a       "${ns_b}" vti6_b
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 40))
+
+	#                                UDP   SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 48))
+
+        mtu "${ns_a}"  veth_A-R1 ${veth_mtu}
+        mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+        mtu "${ns_b}"  veth_B-R1 ${veth_mtu}
+        mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+	# mtu "${ns_a}" vti6_a ${vti_mtu}
+	# mtu "${ns_b}" vti6_b ${vti_mtu}
+
+	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr}
+
+	# Check that exception was not created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+	# Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+	# with the right PMTU value
+        mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr}
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+	check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+
+}
+
 test_pmtu_vti4_default_mtu() {
-	setup namespaces veth vti4 || return 2
+	setup namespaces veth vti4 || return $ksft_skip
 
 	# Check that MTU of vti device is MTU of veth minus IPv4 header length
 	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1435,7 +1818,7 @@ test_pmtu_vti4_default_mtu() {
 }
 
 test_pmtu_vti6_default_mtu() {
-	setup namespaces veth vti6 || return 2
+	setup namespaces veth vti6 || return $ksft_skip
 
 	# Check that MTU of vti device is MTU of veth minus IPv6 header length
 	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1447,10 +1830,10 @@ test_pmtu_vti6_default_mtu() {
 }
 
 test_pmtu_vti4_link_add_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
-	[ $? -ne 0 ] && err "  vti not supported" && return 2
+	[ $? -ne 0 ] && err "  vti not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link del vti4_a
 
 	fail=0
@@ -1485,10 +1868,10 @@ test_pmtu_vti4_link_add_mtu() {
 }
 
 test_pmtu_vti6_link_add_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
-	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
+	[ $? -ne 0 ] && err "  vti6 not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link del vti6_a
 
 	fail=0
@@ -1523,10 +1906,10 @@ test_pmtu_vti6_link_add_mtu() {
 }
 
 test_pmtu_vti6_link_change_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
-	[ $? -ne 0 ] && err "  dummy not supported" && return 2
+	[ $? -ne 0 ] && err "  dummy not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
 	run_cmd ${ns_a} ip link set dummy0 up
 	run_cmd ${ns_a} ip link set dummy1 up
@@ -1574,15 +1957,22 @@ check_command() {
 	return 0
 }
 
+check_running() {
+	pid=${1}
+	cmd=${2}
+
+	[ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ]
+}
+
 test_cleanup_vxlanX_exception() {
 	outer="${1}"
 	encap="vxlan"
 	ll_mtu=4000
 
-	check_command taskset || return 2
+	check_command taskset || return $ksft_skip
 	cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
 
-	setup namespaces routing ${encap}${outer} || return 2
+	setup namespaces routing ${encap}${outer} || return $ksft_skip
 	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
@@ -1604,11 +1994,12 @@ test_cleanup_vxlanX_exception() {
 
 	${ns_a} ip link del dev veth_A-R1 &
 	iplink_pid=$!
-	sleep 1
-	if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
-		err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
-		return 1
-	fi
+	for i in $(seq 1 20); do
+		check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0
+		sleep 0.1
+	done
+	err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
+	return 1
 }
 
 test_cleanup_ipv6_exception() {
@@ -1626,6 +2017,10 @@ run_test() {
 
 	unset IFS
 
+	# Since cleanup() relies on variables modified by this subshell, it
+	# has to run in this context.
+	trap cleanup EXIT
+
 	if [ "$VERBOSE" = "1" ]; then
 		printf "\n##########################################################################\n\n"
 	fi
@@ -1644,7 +2039,7 @@ run_test() {
 		fi
 		err_flush
 		exit 1
-	elif [ $ret -eq 2 ]; then
+	elif [ $ret -eq $ksft_skip ]; then
 		printf "TEST: %-60s  [SKIP]\n" "${tdesc}"
 		err_flush
 	fi
@@ -1652,7 +2047,19 @@ run_test() {
 	return $ret
 	)
 	ret=$?
-	[ $ret -ne 0 ] && exitcode=1
+	case $ret in
+		0)
+			all_skipped=false
+			[ $exitcode -eq $ksft_skip ] && exitcode=0
+		;;
+		$ksft_skip)
+			[ $all_skipped = true ] && exitcode=$ksft_skip
+		;;
+		*)
+			all_skipped=false
+			exitcode=1
+		;;
+	esac
 
 	return $ret
 }
@@ -1667,7 +2074,7 @@ run_test_nh() {
 }
 
 test_list_flush_ipv4_exception() {
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -1721,7 +2128,7 @@ test_list_flush_ipv4_exception() {
 }
 
 test_list_flush_ipv6_exception() {
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -1770,6 +2177,63 @@ test_list_flush_ipv6_exception() {
 	return ${fail}
 }
 
+test_pmtu_ipvX_route_change() {
+	family=${1}
+
+	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	if [ ${family} -eq 4 ]; then
+		ping=ping
+		dst1="${prefix4}.${b_r1}.1"
+		dst2="${prefix4}.${b_r2}.1"
+		gw="${prefix4}.${a_r1}.2"
+	else
+		ping=${ping6}
+		dst1="${prefix6}:${b_r1}::1"
+		dst2="${prefix6}:${b_r2}::1"
+		gw="${prefix6}:${a_r1}::2"
+	fi
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1400
+	mtu "${ns_b}"  veth_B-R1 1400
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	# Create route exceptions
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
+
+	# Check that exceptions have been created with the correct PMTU
+	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+
+	# Replace the route from A to R1
+	run_cmd ${ns_a} ip route change default via ${gw}
+
+	# Delete the device in A
+	run_cmd ${ns_a} ip link del "veth_A-R1"
+}
+
+test_pmtu_ipv4_route_change() {
+	test_pmtu_ipvX_route_change 4
+}
+
+test_pmtu_ipv6_route_change() {
+	test_pmtu_ipvX_route_change 6
+}
+
 usage() {
 	echo
 	echo "$0 [OPTIONS] [TEST]..."
@@ -1786,6 +2250,7 @@ usage() {
 #
 exitcode=0
 desc=0
+all_skipped=true
 
 while getopts :ptv o
 do
@@ -1840,7 +2305,7 @@ for t in ${tests}; do
 	if [ $run_this -eq 1 ]; then
 		run_test "${name}" "${desc}"
 		# if test was skipped no need to retry with nexthop objects
-		[ $? -eq 2 ] && rerun_nh=0
+		[ $? -eq $ksft_skip ] && rerun_nh=0
 
 		if [ "${rerun_nh}" = "1" ]; then
 			run_test_nh "${name}" "${desc}"
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 2c522f7a0aec..1a736f700be4 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -53,15 +53,19 @@
 #include <unistd.h>
 
 #include "psock_lib.h"
+#include "../kselftest.h"
 
 #define RING_NUM_FRAMES			20
 
+static uint32_t cfg_max_num_members;
+
 /* Open a socket in a given fanout mode.
  * @return -1 if mode is bad, a valid socket otherwise */
 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 {
 	struct sockaddr_ll addr = {0};
-	int fd, val;
+	struct fanout_args args;
+	int fd, val, err;
 
 	fd = socket(PF_PACKET, SOCK_RAW, 0);
 	if (fd < 0) {
@@ -83,8 +87,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 		exit(1);
 	}
 
-	val = (((int) typeflags) << 16) | group_id;
-	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+	if (cfg_max_num_members) {
+		args.id = group_id;
+		args.type_flags = typeflags;
+		args.max_num_members = cfg_max_num_members;
+		err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args,
+				 sizeof(args));
+	} else {
+		val = (((int) typeflags) << 16) | group_id;
+		err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val,
+				 sizeof(val));
+	}
+	if (err) {
 		if (close(fd)) {
 			perror("close packet");
 			exit(1);
@@ -98,13 +112,13 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 static void sock_fanout_set_cbpf(int fd)
 {
 	struct sock_filter bpf_filter[] = {
-		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80),	      /* ldb [80] */
-		BPF_STMT(BPF_RET+BPF_A, 0),		      /* ret A */
+		BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80),	      /* ldb [80] */
+		BPF_STMT(BPF_RET | BPF_A, 0),		      /* ret A */
 	};
 	struct sock_fprog bpf_prog;
 
 	bpf_prog.filter = bpf_filter;
-	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+	bpf_prog.len = ARRAY_SIZE(bpf_filter);
 
 	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
 		       sizeof(bpf_prog))) {
@@ -149,7 +163,7 @@ static void sock_fanout_set_ebpf(int fd)
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 	attr.insns = (unsigned long) prog;
-	attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+	attr.insn_cnt = ARRAY_SIZE(prog);
 	attr.license = (unsigned long) "GPL";
 	attr.log_buf = (unsigned long) log_buf,
 	attr.log_size = sizeof(log_buf),
@@ -286,6 +300,56 @@ static void test_control_group(void)
 	}
 }
 
+/* Test illegal max_num_members values */
+static void test_control_group_max_num_members(void)
+{
+	int fds[3];
+
+	fprintf(stderr, "test: control multiple sockets, max_num_members\n");
+
+	/* expected failure on greater than PACKET_FANOUT_MAX */
+	cfg_max_num_members = (1 << 16) + 1;
+	if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+		fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n");
+		exit(1);
+	}
+
+	cfg_max_num_members = 256;
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[0] == -1) {
+		fprintf(stderr, "ERROR: failed open\n");
+		exit(1);
+	}
+
+	/* expected failure on joining group with different max_num_members */
+	cfg_max_num_members = 257;
+	if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+		fprintf(stderr, "ERROR: set different max_num_members\n");
+		exit(1);
+	}
+
+	/* success on joining group with same max_num_members */
+	cfg_max_num_members = 256;
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+
+	/* success on joining group with max_num_members unspecified */
+	cfg_max_num_members = 0;
+	fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[2] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+
+	if (close(fds[2]) || close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "ERROR: closing sockets\n");
+		exit(1);
+	}
+}
+
 /* Test creating a unique fanout group ids */
 static void test_unique_fanout_group_ids(void)
 {
@@ -426,8 +490,11 @@ int main(int argc, char **argv)
 
 	test_control_single();
 	test_control_group();
+	test_control_group_max_num_members();
 	test_unique_fanout_group_ids();
 
+	/* PACKET_FANOUT_MAX */
+	cfg_max_num_members = 1 << 16;
 	/* find a set of ports that do not collide onto the same socket */
 	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
 			    expect_hash[0], expect_hash[1]);
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index faa884385c45..6e4fef560873 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -14,6 +14,8 @@
 #include <arpa/inet.h>
 #include <unistd.h>
 
+#include "kselftest.h"
+
 #define DATA_LEN			100
 #define DATA_CHAR			'a'
 #define DATA_CHAR_1			'b'
@@ -63,7 +65,7 @@ static __maybe_unused void pair_udp_setfilter(int fd)
 	struct sock_fprog bpf_prog;
 
 	bpf_prog.filter = bpf_filter;
-	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+	bpf_prog.len = ARRAY_SIZE(bpf_filter);
 
 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
 		       sizeof(bpf_prog))) {
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
index 7d15e10a9fb6..edf1e6f80d41 100644
--- a/tools/testing/selftests/net/psock_snd.c
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -389,6 +389,8 @@ int main(int argc, char **argv)
 		error(1, errno, "ip link set mtu");
 	if (system("ip addr add dev lo 172.17.0.1/24"))
 		error(1, errno, "ip addr add");
+	if (system("sysctl -w net.ipv4.conf.lo.accept_local=1"))
+		error(1, errno, "sysctl lo.accept_local");
 
 	run_test();
 
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 170be65e0816..1cbfeb5052ec 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
 echo "raw gso min size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
 
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
 echo "raw gso max size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
 
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 7b01b7c2ec10..066efd30e294 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -30,25 +30,25 @@ struct reuse_opts {
 };
 
 struct reuse_opts unreusable_opts[12] = {
-	{0, 0, 0, 0},
-	{0, 0, 0, 1},
-	{0, 0, 1, 0},
-	{0, 0, 1, 1},
-	{0, 1, 0, 0},
-	{0, 1, 0, 1},
-	{0, 1, 1, 0},
-	{0, 1, 1, 1},
-	{1, 0, 0, 0},
-	{1, 0, 0, 1},
-	{1, 0, 1, 0},
-	{1, 0, 1, 1},
+	{{0, 0}, {0, 0}},
+	{{0, 0}, {0, 1}},
+	{{0, 0}, {1, 0}},
+	{{0, 0}, {1, 1}},
+	{{0, 1}, {0, 0}},
+	{{0, 1}, {0, 1}},
+	{{0, 1}, {1, 0}},
+	{{0, 1}, {1, 1}},
+	{{1, 0}, {0, 0}},
+	{{1, 0}, {0, 1}},
+	{{1, 0}, {1, 0}},
+	{{1, 0}, {1, 1}},
 };
 
 struct reuse_opts reusable_opts[4] = {
-	{1, 1, 0, 0},
-	{1, 1, 0, 1},
-	{1, 1, 1, 0},
-	{1, 1, 1, 1},
+	{{1, 1}, {0, 0}},
+	{{1, 1}, {0, 1}},
+	{{1, 1}, {1, 0}},
+	{{1, 1}, {1, 1}},
 };
 
 int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index b5277106df1f..65aea27d761c 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -24,9 +24,7 @@
 #include <sys/resource.h>
 #include <unistd.h>
 
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-#endif
+#include "../kselftest.h"
 
 struct test_params {
 	int recv_family;
@@ -330,7 +328,7 @@ static void test_extra_filter(const struct test_params p)
 	if (bind(fd1, addr, sockaddr_size()))
 		error(1, errno, "failed to bind recv socket 1");
 
-	if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
+	if (!bind(fd2, addr, sockaddr_size()) || errno != EADDRINUSE)
 		error(1, errno, "bind socket 2 should fail with EADDRINUSE");
 
 	free(addr);
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index c9f478b40996..c9ba36aa688e 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -86,7 +86,7 @@ static void attach_bpf(int fd)
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
-	attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+	attr.insn_cnt = ARRAY_SIZE(prog);
 	attr.insns = (unsigned long) &prog;
 	attr.license = (unsigned long) &bpf_license;
 	attr.log_buf = (unsigned long) &bpf_log_buf;
@@ -211,12 +211,16 @@ static void test(int *rcv_fd, int len, int family, int proto)
 
 	/* Forward iterate */
 	for (node = 0; node < len; ++node) {
+		if (!numa_bitmask_isbitset(numa_nodes_ptr, node))
+			continue;
 		send_from_node(node, family, proto);
 		receive_on_node(rcv_fd, len, epfd, node, proto);
 	}
 
 	/* Reverse iterate */
 	for (node = len - 1; node >= 0; --node) {
+		if (!numa_bitmask_isbitset(numa_nodes_ptr, node))
+			continue;
 		send_from_node(node, family, proto);
 		receive_on_node(rcv_fd, len, epfd, node, proto);
 	}
diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh
index 116bfeab72fa..e08701c750e3 100755
--- a/tools/testing/selftests/net/route_localnet.sh
+++ b/tools/testing/selftests/net/route_localnet.sh
@@ -18,8 +18,10 @@ setup() {
     ip route del 127.0.0.0/8 dev lo table local
     ip netns exec "${PEER_NS}" ip route del 127.0.0.0/8 dev lo table local
 
-    ifconfig veth0 127.25.3.4/24 up
-    ip netns exec "${PEER_NS}" ifconfig veth1 127.25.3.14/24 up
+    ip address add 127.25.3.4/24 dev veth0
+    ip link set dev veth0 up
+    ip netns exec "${PEER_NS}" ip address add 127.25.3.14/24 dev veth1
+    ip netns exec "${PEER_NS}" ip link set dev veth1 up
 
     ip route flush cache
     ip netns exec "${PEER_NS}" ip route flush cache
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
new file mode 100755
index 000000000000..4287a8529890
--- /dev/null
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly ksft_skip=4
+readonly cpus=$(nproc)
+ret=0
+
+[ $cpus -gt 2 ] || exit $ksft_skip
+
+readonly INITIAL_RPS_DEFAULT_MASK=$(cat /proc/sys/net/core/rps_default_mask)
+readonly TAG="$(mktemp -u XXXXXX)"
+readonly VETH="veth${TAG}"
+readonly NETNS="ns-${TAG}"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+	echo $INITIAL_RPS_DEFAULT_MASK > /proc/sys/net/core/rps_default_mask
+	ip netns del $NETNS
+}
+
+chk_rps() {
+	local rps_mask expected_rps_mask=$4
+	local dev_name=$3
+	local netns=$2
+	local cmd="cat"
+	local msg=$1
+
+	[ -n "$netns" ] && cmd="ip netns exec $netns $cmd"
+
+	rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
+	printf "%-60s" "$msg"
+
+	# In case there is more than 32 CPUs we need to remove commas from masks
+	rps_mask=${rps_mask//,}
+	expected_rps_mask=${expected_rps_mask//,}
+	if [ $rps_mask -eq $expected_rps_mask ]; then
+		echo "[ ok ]"
+	else
+		echo "[fail] expected $expected_rps_mask found $rps_mask"
+		ret=1
+	fi
+}
+
+trap cleanup EXIT
+
+echo 0 > /proc/sys/net/core/rps_default_mask
+setup
+chk_rps "empty rps_default_mask" $NETNS lo 0
+cleanup
+
+echo 1 > /proc/sys/net/core/rps_default_mask
+setup
+chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
+
+echo 3 > /proc/sys/net/core/rps_default_mask
+chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0
+
+ip link add name $VETH type veth peer netns $NETNS name $VETH
+ip link set dev $VETH up
+ip -n $NETNS link set dev $VETH up
+chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
+chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+ip link del dev $VETH
+ip netns del $NETNS
+
+setup
+chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0
+
+ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1
+ip link add name $VETH type veth peer netns $NETNS name $VETH
+chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1
+chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0
+
+exit $ret
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index c9ce3dfa42ee..bdf6f10d0558 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -4,10 +4,39 @@
 #
 # set -e
 
+ALL_TESTS="
+	kci_test_polrouting
+	kci_test_route_get
+	kci_test_addrlft
+	kci_test_promote_secondaries
+	kci_test_tc
+	kci_test_gre
+	kci_test_gretap
+	kci_test_ip6gretap
+	kci_test_erspan
+	kci_test_ip6erspan
+	kci_test_bridge
+	kci_test_addrlabel
+	kci_test_ifalias
+	kci_test_vrf
+	kci_test_encap
+	kci_test_macsec
+	kci_test_macsec_offload
+	kci_test_ipsec
+	kci_test_ipsec_offload
+	kci_test_fdb_get
+	kci_test_neigh_get
+	kci_test_bridge_parent_id
+	kci_test_address_proto
+	kci_test_enslave_bonding
+"
+
 devdummy="test-dummy0"
+VERBOSE=0
+PAUSE=no
+PAUSE_ON_FAIL=no
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
 
 # set global exit status, but never reset nonzero one.
 check_err()
@@ -25,35 +54,102 @@ check_fail()
 	fi
 }
 
+run_cmd_common()
+{
+	local cmd="$*"
+	local out
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: ${cmd}"
+	fi
+	out=$($cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+	return $rc
+}
+
+run_cmd() {
+	run_cmd_common "$@"
+	rc=$?
+	check_err $rc
+	return $rc
+}
+run_cmd_fail()
+{
+	run_cmd_common "$@"
+	rc=$?
+	check_fail $rc
+	return $rc
+}
+
+run_cmd_grep_common()
+{
+	local find="$1"; shift
+	local cmd="$*"
+	local out
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: ${cmd} 2>&1 | grep -q '${find}'"
+	fi
+	out=$($cmd 2>&1 | grep -q "${find}" 2>&1)
+	return $?
+}
+
+run_cmd_grep() {
+	run_cmd_grep_common "$@"
+	rc=$?
+	check_err $rc
+	return $rc
+}
+
+run_cmd_grep_fail()
+{
+	run_cmd_grep_common "$@"
+	rc=$?
+	check_fail $rc
+	return $rc
+}
+
+end_test()
+{
+	echo "$*"
+	[ "${VERBOSE}" = "1" ] && echo
+
+	if [[ $ret -ne 0 ]] && [[ "${PAUSE_ON_FAIL}" = "yes" ]]; then
+		echo "Hit enter to continue"
+		read a
+	fi;
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo "Hit enter to continue"
+		read a
+	fi
+
+}
+
+
 kci_add_dummy()
 {
-	ip link add name "$devdummy" type dummy
-	check_err $?
-	ip link set "$devdummy" up
-	check_err $?
+	run_cmd ip link add name "$devdummy" type dummy
+	run_cmd ip link set "$devdummy" up
 }
 
 kci_del_dummy()
 {
-	ip link del dev "$devdummy"
-	check_err $?
+	run_cmd ip link del dev "$devdummy"
 }
 
 kci_test_netconf()
 {
 	dev="$1"
 	r=$ret
-
-	ip netconf show dev "$dev" > /dev/null
-	check_err $?
-
+	run_cmd ip netconf show dev "$dev"
 	for f in 4 6; do
-		ip -$f netconf show dev "$dev" > /dev/null
-		check_err $?
+		run_cmd ip -$f netconf show dev "$dev"
 	done
 
 	if [ $ret -ne 0 ] ;then
-		echo "FAIL: ip netconf show $dev"
+		end_test "FAIL: ip netconf show $dev"
 		test $r -eq 0 && ret=0
 		return 1
 	fi
@@ -66,43 +162,27 @@ kci_test_bridge()
 	vlandev="testbr-vlan1"
 
 	local ret=0
-	ip link add name "$devbr" type bridge
-	check_err $?
-
-	ip link set dev "$devdummy" master "$devbr"
-	check_err $?
-
-	ip link set "$devbr" up
-	check_err $?
-
-	ip link add link "$devbr" name "$vlandev" type vlan id 1
-	check_err $?
-	ip addr add dev "$vlandev" 10.200.7.23/30
-	check_err $?
-	ip -6 addr add dev "$vlandev" dead:42::1234/64
-	check_err $?
-	ip -d link > /dev/null
-	check_err $?
-	ip r s t all > /dev/null
-	check_err $?
+	run_cmd ip link add name "$devbr" type bridge
+	run_cmd ip link set dev "$devdummy" master "$devbr"
+	run_cmd ip link set "$devbr" up
+	run_cmd ip link add link "$devbr" name "$vlandev" type vlan id 1
+	run_cmd ip addr add dev "$vlandev" 10.200.7.23/30
+	run_cmd ip -6 addr add dev "$vlandev" dead:42::1234/64
+	run_cmd ip -d link
+	run_cmd ip r s t all
 
 	for name in "$devbr" "$vlandev" "$devdummy" ; do
 		kci_test_netconf "$name"
 	done
-
-	ip -6 addr del dev "$vlandev" dead:42::1234/64
-	check_err $?
-
-	ip link del dev "$vlandev"
-	check_err $?
-	ip link del dev "$devbr"
-	check_err $?
+	run_cmd ip -6 addr del dev "$vlandev" dead:42::1234/64
+	run_cmd ip link del dev "$vlandev"
+	run_cmd ip link del dev "$devbr"
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: bridge setup"
+		end_test "FAIL: bridge setup"
 		return 1
 	fi
-	echo "PASS: bridge setup"
+	end_test "PASS: bridge setup"
 
 }
 
@@ -113,34 +193,23 @@ kci_test_gre()
 	loc=10.0.0.1
 
 	local ret=0
-	ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
-	check_err $?
-	ip link set $gredev up
-	check_err $?
-	ip addr add 10.23.7.10 dev $gredev
-	check_err $?
-	ip route add 10.23.8.0/30 dev $gredev
-	check_err $?
-	ip addr add dev "$devdummy" 10.23.7.11/24
-	check_err $?
-	ip link > /dev/null
-	check_err $?
-	ip addr > /dev/null
-	check_err $?
+	run_cmd ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+	run_cmd ip link set $gredev up
+	run_cmd ip addr add 10.23.7.10 dev $gredev
+	run_cmd ip route add 10.23.8.0/30 dev $gredev
+	run_cmd ip addr add dev "$devdummy" 10.23.7.11/24
+	run_cmd ip link
+	run_cmd ip addr
 
 	kci_test_netconf "$gredev"
-
-	ip addr del dev "$devdummy" 10.23.7.11/24
-	check_err $?
-
-	ip link del $gredev
-	check_err $?
+	run_cmd ip addr del dev "$devdummy" 10.23.7.11/24
+	run_cmd ip link del $gredev
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: gre tunnel endpoint"
+		end_test "FAIL: gre tunnel endpoint"
 		return 1
 	fi
-	echo "PASS: gre tunnel endpoint"
+	end_test "PASS: gre tunnel endpoint"
 }
 
 # tc uses rtnetlink too, for full tc testing
@@ -150,56 +219,40 @@ kci_test_tc()
 	dev=lo
 	local ret=0
 
-	tc qdisc add dev "$dev" root handle 1: htb
-	check_err $?
-	tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
-	check_err $?
-	tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
-	check_err $?
-	tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
-	check_err $?
-	tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
-	check_err $?
-	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
-	check_err $?
-	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
-	check_err $?
-	tc filter show dev "$dev" parent  1:0 > /dev/null
-	check_err $?
-	tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
-	check_err $?
-	tc filter show dev "$dev" parent  1:0 > /dev/null
-	check_err $?
-	tc qdisc del dev "$dev" root handle 1: htb
-	check_err $?
+	run_cmd tc qdisc add dev "$dev" root handle 1: htb
+	run_cmd tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+	run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+	run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+	run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+	run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+	run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+	run_cmd tc filter show dev "$dev" parent  1:0
+	run_cmd tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+	run_cmd tc filter show dev "$dev" parent  1:0
+	run_cmd tc qdisc del dev "$dev" root handle 1: htb
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: tc htb hierarchy"
+		end_test "FAIL: tc htb hierarchy"
 		return 1
 	fi
-	echo "PASS: tc htb hierarchy"
+	end_test "PASS: tc htb hierarchy"
 
 }
 
 kci_test_polrouting()
 {
 	local ret=0
-	ip rule add fwmark 1 lookup 100
-	check_err $?
-	ip route add local 0.0.0.0/0 dev lo table 100
-	check_err $?
-	ip r s t all > /dev/null
-	check_err $?
-	ip rule del fwmark 1 lookup 100
-	check_err $?
-	ip route del local 0.0.0.0/0 dev lo table 100
-	check_err $?
+	run_cmd ip rule add fwmark 1 lookup 100
+	run_cmd ip route add local 0.0.0.0/0 dev lo table 100
+	run_cmd ip r s t all
+	run_cmd ip rule del fwmark 1 lookup 100
+	run_cmd ip route del local 0.0.0.0/0 dev lo table 100
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: policy route test"
+		end_test "FAIL: policy route test"
 		return 1
 	fi
-	echo "PASS: policy routing"
+	end_test "PASS: policy routing"
 }
 
 kci_test_route_get()
@@ -207,65 +260,51 @@ kci_test_route_get()
 	local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
 
 	local ret=0
-
-	ip route get 127.0.0.1 > /dev/null
-	check_err $?
-	ip route get 127.0.0.1 dev "$devdummy" > /dev/null
-	check_err $?
-	ip route get ::1 > /dev/null
-	check_err $?
-	ip route get fe80::1 dev "$devdummy" > /dev/null
-	check_err $?
-	ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
-	check_err $?
-	ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
-	check_err $?
-	ip addr add dev "$devdummy" 10.23.7.11/24
-	check_err $?
-	ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
-	check_err $?
-	ip route add 10.23.8.0/24 \
+	run_cmd ip route get 127.0.0.1
+	run_cmd ip route get 127.0.0.1 dev "$devdummy"
+	run_cmd ip route get ::1
+	run_cmd ip route get fe80::1 dev "$devdummy"
+	run_cmd ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1
+	run_cmd ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1
+	run_cmd ip addr add dev "$devdummy" 10.23.7.11/24
+	run_cmd ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy"
+	run_cmd ip route add 10.23.8.0/24 \
 		nexthop via 10.23.7.13 dev "$devdummy" \
 		nexthop via 10.23.7.14 dev "$devdummy"
-	check_err $?
+
 	sysctl -wq net.ipv4.fib_multipath_hash_policy=0
-	ip route get 10.23.8.11 > /dev/null
-	check_err $?
+	run_cmd ip route get 10.23.8.11
 	sysctl -wq net.ipv4.fib_multipath_hash_policy=1
-	ip route get 10.23.8.11 > /dev/null
-	check_err $?
+	run_cmd ip route get 10.23.8.11
 	sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy"
-	ip route del 10.23.8.0/24
-	check_err $?
-	ip addr del dev "$devdummy" 10.23.7.11/24
-	check_err $?
+	run_cmd ip route del 10.23.8.0/24
+	run_cmd ip addr del dev "$devdummy" 10.23.7.11/24
+
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: route get"
+		end_test "FAIL: route get"
 		return 1
 	fi
 
-	echo "PASS: route get"
+	end_test "PASS: route get"
 }
 
 kci_test_addrlft()
 {
 	for i in $(seq 10 100) ;do
 		lft=$(((RANDOM%3) + 1))
-		ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
-		check_err $?
+		run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
 	done
 
 	sleep 5
-
-	ip addr show dev "$devdummy" | grep "10.23.11."
+	run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
 	if [ $? -eq 0 ]; then
-		echo "FAIL: preferred_lft addresses remaining"
 		check_err 1
+		end_test "FAIL: preferred_lft addresses remaining"
 		return
 	fi
 
-	echo "PASS: preferred_lft addresses have expired"
+	end_test "PASS: preferred_lft addresses have expired"
 }
 
 kci_test_promote_secondaries()
@@ -284,27 +323,17 @@ kci_test_promote_secondaries()
 
 	[ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0
 
-	echo "PASS: promote_secondaries complete"
+	end_test "PASS: promote_secondaries complete"
 }
 
 kci_test_addrlabel()
 {
 	local ret=0
-
-	ip addrlabel add prefix dead::/64 dev lo label 1
-	check_err $?
-
-	ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
-	check_err $?
-
-	ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
-	check_err $?
-
-	ip addrlabel add prefix dead::/64 label 1 2> /dev/null
-	check_err $?
-
-	ip addrlabel del prefix dead::/64 label 1 2> /dev/null
-	check_err $?
+	run_cmd ip addrlabel add prefix dead::/64 dev lo label 1
+	run_cmd_grep "prefix dead::/64 dev lo label 1" ip addrlabel list
+	run_cmd ip addrlabel del prefix dead::/64 dev lo label 1
+	run_cmd ip addrlabel add prefix dead::/64 label 1
+	run_cmd ip addrlabel del prefix dead::/64 label 1
 
 	# concurrent add/delete
 	for i in $(seq 1 1000); do
@@ -320,11 +349,11 @@ kci_test_addrlabel()
 	ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: ipv6 addrlabel"
+		end_test "FAIL: ipv6 addrlabel"
 		return 1
 	fi
 
-	echo "PASS: ipv6 addrlabel"
+	end_test "PASS: ipv6 addrlabel"
 }
 
 kci_test_ifalias()
@@ -332,35 +361,28 @@ kci_test_ifalias()
 	local ret=0
 	namewant=$(uuidgen)
 	syspathname="/sys/class/net/$devdummy/ifalias"
-
-	ip link set dev "$devdummy" alias "$namewant"
-	check_err $?
+	run_cmd ip link set dev "$devdummy" alias "$namewant"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: cannot set interface alias of $devdummy to $namewant"
+		end_test "FAIL: cannot set interface alias of $devdummy to $namewant"
 		return 1
 	fi
-
-	ip link show "$devdummy" | grep -q "alias $namewant"
-	check_err $?
+	run_cmd_grep "alias $namewant" ip link show "$devdummy"
 
 	if [ -r "$syspathname" ] ; then
 		read namehave < "$syspathname"
 		if [ "$namewant" != "$namehave" ]; then
-			echo "FAIL: did set ifalias $namewant but got $namehave"
+			end_test "FAIL: did set ifalias $namewant but got $namehave"
 			return 1
 		fi
 
 		namewant=$(uuidgen)
 		echo "$namewant" > "$syspathname"
-	        ip link show "$devdummy" | grep -q "alias $namewant"
-		check_err $?
+	        run_cmd_grep "alias $namewant" ip link show "$devdummy"
 
 		# sysfs interface allows to delete alias again
 		echo "" > "$syspathname"
-
-	        ip link show "$devdummy" | grep -q "alias $namewant"
-		check_fail $?
+	        run_cmd_grep_fail "alias $namewant" ip link show "$devdummy"
 
 		for i in $(seq 1 100); do
 			uuidgen > "$syspathname" &
@@ -369,57 +391,48 @@ kci_test_ifalias()
 		wait
 
 		# re-add the alias -- kernel should free mem when dummy dev is removed
-		ip link set dev "$devdummy" alias "$namewant"
-		check_err $?
+		run_cmd ip link set dev "$devdummy" alias "$namewant"
+
 	fi
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: set interface alias $devdummy to $namewant"
+		end_test "FAIL: set interface alias $devdummy to $namewant"
 		return 1
 	fi
 
-	echo "PASS: set ifalias $namewant for $devdummy"
+	end_test "PASS: set ifalias $namewant for $devdummy"
 }
 
 kci_test_vrf()
 {
 	vrfname="test-vrf"
 	local ret=0
-
-	ip link show type vrf 2>/dev/null
+	run_cmd ip link show type vrf
 	if [ $? -ne 0 ]; then
-		echo "SKIP: vrf: iproute2 too old"
+		end_test "SKIP: vrf: iproute2 too old"
 		return $ksft_skip
 	fi
-
-	ip link add "$vrfname" type vrf table 10
-	check_err $?
+	run_cmd ip link add "$vrfname" type vrf table 10
 	if [ $ret -ne 0 ];then
-		echo "FAIL: can't add vrf interface, skipping test"
+		end_test "FAIL: can't add vrf interface, skipping test"
 		return 0
 	fi
-
-	ip -br link show type vrf | grep -q "$vrfname"
-	check_err $?
+	run_cmd_grep "$vrfname" ip -br link show type vrf
 	if [ $ret -ne 0 ];then
-		echo "FAIL: created vrf device not found"
+		end_test "FAIL: created vrf device not found"
 		return 1
 	fi
 
-	ip link set dev "$vrfname" up
-	check_err $?
-
-	ip link set dev "$devdummy" master "$vrfname"
-	check_err $?
-	ip link del dev "$vrfname"
-	check_err $?
+	run_cmd ip link set dev "$vrfname" up
+	run_cmd ip link set dev "$devdummy" master "$vrfname"
+	run_cmd ip link del dev "$vrfname"
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: vrf"
+		end_test "FAIL: vrf"
 		return 1
 	fi
 
-	echo "PASS: vrf"
+	end_test "PASS: vrf"
 }
 
 kci_test_encap_vxlan()
@@ -427,151 +440,92 @@ kci_test_encap_vxlan()
 	local ret=0
 	vxlan="test-vxlan0"
 	vlan="test-vlan0"
-	testns="$1"
-
-	ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
-		dev "$devdummy" dstport 4789 2>/dev/null
+	run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
+		dev "$devdummy" dstport 4789
 	if [ $? -ne 0 ]; then
-		echo "FAIL: can't add vxlan interface, skipping test"
+		end_test "FAIL: can't add vxlan interface, skipping test"
 		return 0
 	fi
-	check_err $?
-
-	ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan"
-	check_err $?
 
-	ip -netns "$testns" link set up dev "$vxlan"
-	check_err $?
-
-	ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1
-	check_err $?
+	run_cmd ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan"
+	run_cmd ip -netns "$testns" link set up dev "$vxlan"
+	run_cmd ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1
 
 	# changelink testcases
-	ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy" 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64
-	check_err $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning
-	check_err $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan proxy 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan norsc 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan external 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan gbp 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link set dev "$vxlan" type vxlan gpe 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" link del "$vxlan"
-	check_err $?
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy"
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit
+
+	run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64
+	run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning
+
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan proxy
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan norsc
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan external
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gbp
+	run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gpe
+	run_cmd ip -netns "$testns" link del "$vxlan"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: vxlan"
+		end_test "FAIL: vxlan"
 		return 1
 	fi
-	echo "PASS: vxlan"
+	end_test "PASS: vxlan"
 }
 
 kci_test_encap_fou()
 {
 	local ret=0
 	name="test-fou"
-	testns="$1"
-
-	ip fou help 2>&1 |grep -q 'Usage: ip fou'
+	run_cmd_grep 'Usage: ip fou' ip fou help
 	if [ $? -ne 0 ];then
-		echo "SKIP: fou: iproute2 too old"
+		end_test "SKIP: fou: iproute2 too old"
 		return $ksft_skip
 	fi
 
 	if ! /sbin/modprobe -q -n fou; then
-		echo "SKIP: module fou is not found"
+		end_test "SKIP: module fou is not found"
 		return $ksft_skip
 	fi
 	/sbin/modprobe -q fou
-	ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
+
+	run_cmd ip -netns "$testns" fou add port 7777 ipproto 47
 	if [ $? -ne 0 ];then
-		echo "FAIL: can't add fou port 7777, skipping test"
+		end_test "FAIL: can't add fou port 7777, skipping test"
 		return 1
 	fi
-
-	ip -netns "$testns" fou add port 8888 ipproto 4
-	check_err $?
-
-	ip -netns "$testns" fou del port 9999 2>/dev/null
-	check_fail $?
-
-	ip -netns "$testns" fou del port 7777
-	check_err $?
-
+	run_cmd ip -netns "$testns" fou add port 8888 ipproto 4
+	run_cmd_fail ip -netns "$testns" fou del port 9999
+	run_cmd ip -netns "$testns" fou del port 7777
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: fou"
+		end_test "FAIL: fou"s
 		return 1
 	fi
 
-	echo "PASS: fou"
+	end_test "PASS: fou"
 }
 
 # test various encap methods, use netns to avoid unwanted interference
 kci_test_encap()
 {
-	testns="testns"
 	local ret=0
-
-	ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
-		echo "SKIP encap tests: cannot add net namespace $testns"
+		end_test "SKIP encap tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
-
-	ip -netns "$testns" link set lo up
-	check_err $?
-
-	ip -netns "$testns" link add name "$devdummy" type dummy
-	check_err $?
-	ip -netns "$testns" link set "$devdummy" up
-	check_err $?
-
-	kci_test_encap_vxlan "$testns"
-	check_err $?
-	kci_test_encap_fou "$testns"
-	check_err $?
+	run_cmd ip -netns "$testns" link set lo up
+	run_cmd ip -netns "$testns" link add name "$devdummy" type dummy
+	run_cmd ip -netns "$testns" link set "$devdummy" up
+	run_cmd kci_test_encap_vxlan
+	run_cmd kci_test_encap_fou
 
 	ip netns del "$testns"
 	return $ret
@@ -581,41 +535,95 @@ kci_test_macsec()
 {
 	msname="test_macsec0"
 	local ret=0
-
-	ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+	run_cmd_grep "^Usage: ip macsec" ip macsec help
 	if [ $? -ne 0 ]; then
-		echo "SKIP: macsec: iproute2 too old"
+		end_test "SKIP: macsec: iproute2 too old"
 		return $ksft_skip
 	fi
+	run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+	if [ $ret -ne 0 ];then
+		end_test "FAIL: can't add macsec interface, skipping test"
+		return 1
+	fi
+	run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+	run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+	run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
+	run_cmd ip macsec show
+	run_cmd ip link del dev "$msname"
 
-	ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
-	check_err $?
 	if [ $ret -ne 0 ];then
-		echo "FAIL: can't add macsec interface, skipping test"
+		end_test "FAIL: macsec"
 		return 1
 	fi
 
-	ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
-	check_err $?
+	end_test "PASS: macsec"
+}
 
-	ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
-	check_err $?
+kci_test_macsec_offload()
+{
+	sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
+	sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
+	probed=false
+	local ret=0
+	run_cmd_grep "^Usage: ip macsec" ip macsec help
+	if [ $? -ne 0 ]; then
+		end_test "SKIP: macsec: iproute2 too old"
+		return $ksft_skip
+	fi
 
-	ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
-	check_err $?
+	if ! mount | grep -q debugfs; then
+		mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+	fi
 
-	ip macsec show > /dev/null
-	check_err $?
+	# setup netdevsim since dummydev doesn't have offload support
+	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+		run_cmd modprobe -q netdevsim
 
-	ip link del dev "$msname"
-	check_err $?
+		if [ $ret -ne 0 ]; then
+			end_test "SKIP: macsec_offload can't load netdevsim"
+			return $ksft_skip
+		fi
+		probed=true
+	fi
 
-	if [ $ret -ne 0 ];then
-		echo "FAIL: macsec"
+	echo "0" > /sys/bus/netdevsim/new_device
+	while [ ! -d $sysfsnet ] ; do :; done
+	udevadm settle
+	dev=`ls $sysfsnet`
+
+	ip link set $dev up
+	if [ ! -d $sysfsd ] ; then
+		end_test "FAIL: macsec_offload can't create device $dev"
+		return 1
+	fi
+	run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev
+	if [ $? -eq 1 ] ; then
+		end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload"
 		return 1
 	fi
+	run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac
+	run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
+	run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac
+	run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac
+
+	msname=kci_macsec1
+	run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+	run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+	run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
+		key 00 0123456789abcdef0123456789abcdef
+	run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef"
+	# clean up any leftovers
+	for msdev in kci_macsec{1,2,3,4} ; do
+	    ip link del $msdev 2> /dev/null
+	done
+	echo 0 > /sys/bus/netdevsim/del_device
+	$probed && rmmod netdevsim
 
-	echo "PASS: macsec"
+	if [ $ret -ne 0 ]; then
+		end_test "FAIL: macsec_offload"
+		return 1
+	fi
+	end_test "PASS: macsec_offload"
 }
 
 #-------------------------------------------------------------------
@@ -647,8 +655,7 @@ kci_test_ipsec()
 	ip addr add $srcip dev $devdummy
 
 	# flush to be sure there's nothing configured
-	ip x s flush ; ip x p flush
-	check_err $?
+	run_cmd ip x s flush ; ip x p flush
 
 	# start the monitor in the background
 	tmpfile=`mktemp /var/run/ipsectestXXX`
@@ -656,72 +663,57 @@ kci_test_ipsec()
 	sleep 0.2
 
 	ipsecid="proto esp src $srcip dst $dstip spi 0x07"
-	ip x s add $ipsecid \
+	run_cmd ip x s add $ipsecid \
             mode transport reqid 0x07 replay-window 32 \
             $algo sel src $srcip/24 dst $dstip/24
-	check_err $?
 
-	lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
-	test $lines -eq 2
-	check_err $?
 
-	ip x s count | grep -q "SAD count 1"
-	check_err $?
+	lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+	run_cmd test $lines -eq 2
+	run_cmd_grep "SAD count 1" ip x s count
 
 	lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
-	test $lines -eq 2
-	check_err $?
-
-	ip x s delete $ipsecid
-	check_err $?
+	run_cmd test $lines -eq 2
+	run_cmd ip x s delete $ipsecid
 
 	lines=`ip x s list | wc -l`
-	test $lines -eq 0
-	check_err $?
+	run_cmd test $lines -eq 0
 
 	ipsecsel="dir out src $srcip/24 dst $dstip/24"
-	ip x p add $ipsecsel \
+	run_cmd ip x p add $ipsecsel \
 		    tmpl proto esp src $srcip dst $dstip \
 		    spi 0x07 mode transport reqid 0x07
-	check_err $?
+
 
 	lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
-	test $lines -eq 2
-	check_err $?
+	run_cmd test $lines -eq 2
 
-	ip x p count | grep -q "SPD IN  0 OUT 1 FWD 0"
-	check_err $?
+	run_cmd_grep "SPD IN  0 OUT 1 FWD 0" ip x p count
 
 	lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
-	test $lines -eq 2
-	check_err $?
+	run_cmd test $lines -eq 2
 
-	ip x p delete $ipsecsel
-	check_err $?
+	run_cmd ip x p delete $ipsecsel
 
 	lines=`ip x p list | wc -l`
-	test $lines -eq 0
-	check_err $?
+	run_cmd test $lines -eq 0
 
 	# check the monitor results
 	kill $mpid
 	lines=`wc -l $tmpfile | cut "-d " -f1`
-	test $lines -eq 20
-	check_err $?
+	run_cmd test $lines -eq 20
 	rm -rf $tmpfile
 
 	# clean up any leftovers
-	ip x s flush
-	check_err $?
-	ip x p flush
-	check_err $?
+	run_cmd ip x s flush
+	run_cmd ip x p flush
 	ip addr del $srcip/32 dev $devdummy
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ipsec"
+		end_test "FAIL: ipsec"
 		return 1
 	fi
-	echo "PASS: ipsec"
+	end_test "PASS: ipsec"
 }
 
 #-------------------------------------------------------------------
@@ -747,12 +739,15 @@ kci_test_ipsec_offload()
 	sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
 	probed=false
 
+	if ! mount | grep -q debugfs; then
+		mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+	fi
+
 	# setup netdevsim since dummydev doesn't have offload support
 	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
-		modprobe -q netdevsim
-		check_err $?
+		run_cmd modprobe -q netdevsim
 		if [ $ret -ne 0 ]; then
-			echo "SKIP: ipsec_offload can't load netdevsim"
+			end_test "SKIP: ipsec_offload can't load netdevsim"
 			return $ksft_skip
 		fi
 		probed=true
@@ -766,11 +761,11 @@ kci_test_ipsec_offload()
 	ip addr add $srcip dev $dev
 	ip link set $dev up
 	if [ ! -d $sysfsd ] ; then
-		echo "FAIL: ipsec_offload can't create device $dev"
+		end_test "FAIL: ipsec_offload can't create device $dev"
 		return 1
 	fi
 	if [ ! -f $sysfsf ] ; then
-		echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+		end_test "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
 		return 1
 	fi
 
@@ -778,40 +773,41 @@ kci_test_ipsec_offload()
 	ip x s flush ; ip x p flush
 
 	# create offloaded SAs, both in and out
-	ip x p add dir out src $srcip/24 dst $dstip/24 \
+	run_cmd ip x p add dir out src $srcip/24 dst $dstip/24 \
 	    tmpl proto esp src $srcip dst $dstip spi 9 \
 	    mode transport reqid 42
-	check_err $?
-	ip x p add dir out src $dstip/24 dst $srcip/24 \
+
+	run_cmd ip x p add dir in src $dstip/24 dst $srcip/24 \
 	    tmpl proto esp src $dstip dst $srcip spi 9 \
 	    mode transport reqid 42
-	check_err $?
 
-	ip x s add proto esp src $srcip dst $dstip spi 9 \
+	run_cmd ip x s add proto esp src $srcip dst $dstip spi 9 \
 	    mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
 	    offload dev $dev dir out
-	check_err $?
-	ip x s add proto esp src $dstip dst $srcip spi 9 \
+
+	run_cmd ip x s add proto esp src $dstip dst $srcip spi 9 \
 	    mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
 	    offload dev $dev dir in
-	check_err $?
+
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ipsec_offload can't create SA"
+		end_test "FAIL: ipsec_offload can't create SA"
 		return 1
 	fi
 
 	# does offload show up in ip output
 	lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
 	if [ $lines -ne 2 ] ; then
-		echo "FAIL: ipsec_offload SA offload missing from list output"
 		check_err 1
+		end_test "FAIL: ipsec_offload SA offload missing from list output"
 	fi
 
+	# we didn't create a peer, make sure we can Tx
+	ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55
 	# use ping to exercise the Tx path
 	ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
 
 	# does driver have correct offload info
-	diff $sysfsf - << EOF
+	run_cmd diff $sysfsf - << EOF
 SA count=2 tx=3
 sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
 sa[0]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
@@ -821,7 +817,7 @@ sa[1]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
 sa[1]    key=0x34333231 38373635 32313039 36353433
 EOF
 	if [ $? -ne 0 ] ; then
-		echo "FAIL: ipsec_offload incorrect driver data"
+		end_test "FAIL: ipsec_offload incorrect driver data"
 		check_err 1
 	fi
 
@@ -830,255 +826,205 @@ EOF
 	ip x p flush
 	lines=`grep -c "SA count=0" $sysfsf`
 	if [ $lines -ne 1 ] ; then
-		echo "FAIL: ipsec_offload SA not removed from driver"
 		check_err 1
+		end_test "FAIL: ipsec_offload SA not removed from driver"
 	fi
 
 	# clean up any leftovers
+	echo 0 > /sys/bus/netdevsim/del_device
 	$probed && rmmod netdevsim
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ipsec_offload"
+		end_test "FAIL: ipsec_offload"
 		return 1
 	fi
-	echo "PASS: ipsec_offload"
+	end_test "PASS: ipsec_offload"
 }
 
 kci_test_gretap()
 {
-	testns="testns"
 	DEV_NS=gretap00
 	local ret=0
 
-	ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
-		echo "SKIP gretap tests: cannot add net namespace $testns"
+		end_test "SKIP gretap tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
 
-	ip link help gretap 2>&1 | grep -q "^Usage:"
+	run_cmd_grep "^Usage:" ip link help gretap
 	if [ $? -ne 0 ];then
-		echo "SKIP: gretap: iproute2 too old"
+		end_test "SKIP: gretap: iproute2 too old"
 		ip netns del "$testns"
 		return $ksft_skip
 	fi
 
 	# test native tunnel
-	ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \
 		key 102 local 172.16.1.100 remote 172.16.1.200
-	check_err $?
-
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
 
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
-	ip -netns "$testns" link add dev "$DEV_NS" type gretap external
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap external
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: gretap"
+		end_test "FAIL: gretap"
 		ip netns del "$testns"
 		return 1
 	fi
-	echo "PASS: gretap"
+	end_test "PASS: gretap"
 
 	ip netns del "$testns"
 }
 
 kci_test_ip6gretap()
 {
-	testns="testns"
 	DEV_NS=ip6gretap00
 	local ret=0
 
-	ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
-		echo "SKIP ip6gretap tests: cannot add net namespace $testns"
+		end_test "SKIP ip6gretap tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
 
-	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
+	run_cmd_grep "^Usage:" ip link help ip6gretap
 	if [ $? -ne 0 ];then
-		echo "SKIP: ip6gretap: iproute2 too old"
+		end_test "SKIP: ip6gretap: iproute2 too old"
 		ip netns del "$testns"
 		return $ksft_skip
 	fi
 
 	# test native tunnel
-	ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \
 		key 102 local fc00:100::1 remote fc00:100::2
-	check_err $?
-
-	ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
 
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
-	ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ip6gretap"
+		end_test "FAIL: ip6gretap"
 		ip netns del "$testns"
 		return 1
 	fi
-	echo "PASS: ip6gretap"
+	end_test "PASS: ip6gretap"
 
 	ip netns del "$testns"
 }
 
 kci_test_erspan()
 {
-	testns="testns"
 	DEV_NS=erspan00
 	local ret=0
-
-	ip link help erspan 2>&1 | grep -q "^Usage:"
+	run_cmd_grep "^Usage:" ip link help erspan
 	if [ $? -ne 0 ];then
-		echo "SKIP: erspan: iproute2 too old"
+		end_test "SKIP: erspan: iproute2 too old"
 		return $ksft_skip
 	fi
-
-	ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
-		echo "SKIP erspan tests: cannot add net namespace $testns"
+		end_test "SKIP erspan tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
 
 	# test native tunnel erspan v1
-	ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
 		key 102 local 172.16.1.100 remote 172.16.1.200 \
 		erspan_ver 1 erspan 488
-	check_err $?
 
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test native tunnel erspan v2
-	ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
 		key 102 local 172.16.1.100 remote 172.16.1.200 \
 		erspan_ver 2 erspan_dir ingress erspan_hwid 7
-	check_err $?
 
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
-
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
 
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
-	ip -netns "$testns" link add dev "$DEV_NS" type erspan external
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan external
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: erspan"
+		end_test "FAIL: erspan"
 		ip netns del "$testns"
 		return 1
 	fi
-	echo "PASS: erspan"
+	end_test "PASS: erspan"
 
 	ip netns del "$testns"
 }
 
 kci_test_ip6erspan()
 {
-	testns="testns"
 	DEV_NS=ip6erspan00
 	local ret=0
-
-	ip link help ip6erspan 2>&1 | grep -q "^Usage:"
+	run_cmd_grep "^Usage:" ip link help ip6erspan
 	if [ $? -ne 0 ];then
-		echo "SKIP: ip6erspan: iproute2 too old"
+		end_test "SKIP: ip6erspan: iproute2 too old"
 		return $ksft_skip
 	fi
-
-	ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
-		echo "SKIP ip6erspan tests: cannot add net namespace $testns"
+		end_test "SKIP ip6erspan tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
 
 	# test native tunnel ip6erspan v1
-	ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
 		key 102 local fc00:100::1 remote fc00:100::2 \
 		erspan_ver 1 erspan 488
-	check_err $?
 
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
-
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test native tunnel ip6erspan v2
-	ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
 		key 102 local fc00:100::1 remote fc00:100::2 \
 		erspan_ver 2 erspan_dir ingress erspan_hwid 7
-	check_err $?
-
-	ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	check_err $?
 
-	ip -netns "$testns" link set dev $DEV_NS up
-	check_err $?
 
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
-	ip -netns "$testns" link add dev "$DEV_NS" \
+	run_cmd ip -netns "$testns" link add dev "$DEV_NS" \
 		type ip6erspan external
-	check_err $?
 
-	ip -netns "$testns" link del "$DEV_NS"
-	check_err $?
+	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: ip6erspan"
+		end_test "FAIL: ip6erspan"
 		ip netns del "$testns"
 		return 1
 	fi
-	echo "PASS: ip6erspan"
+	end_test "PASS: ip6erspan"
 
 	ip netns del "$testns"
 }
 
 kci_test_fdb_get()
 {
-	IP="ip -netns testns"
-	BRIDGE="bridge -netns testns"
 	brdev="test-br0"
 	vxlandev="vxlan10"
 	test_mac=de:ad:be:ef:13:37
@@ -1086,45 +1032,37 @@ kci_test_fdb_get()
 	dstip="10.0.2.3"
 	local ret=0
 
-	bridge fdb help 2>&1 |grep -q 'bridge fdb get'
+	run_cmd_grep 'bridge fdb get' bridge fdb help
 	if [ $? -ne 0 ];then
-		echo "SKIP: fdb get tests: iproute2 too old"
+		end_test "SKIP: fdb get tests: iproute2 too old"
 		return $ksft_skip
 	fi
 
-	ip netns add testns
+	setup_ns testns
 	if [ $? -ne 0 ]; then
-		echo "SKIP fdb get tests: cannot add net namespace $testns"
+		end_test "SKIP fdb get tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
-
-	$IP link add "$vxlandev" type vxlan id 10 local $localip \
-                dstport 4789 2>/dev/null
-	check_err $?
-	$IP link add name "$brdev" type bridge &>/dev/null
-	check_err $?
-	$IP link set dev "$vxlandev" master "$brdev" &>/dev/null
-	check_err $?
-	$BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null
-	check_err $?
-	$BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null
-	check_err $?
-
-	$BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
-	check_err $?
-	$BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
-	check_err $?
-	$BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip"
-	check_err $?
-
-	ip netns del testns &>/dev/null
+	IP="ip -netns $testns"
+	BRIDGE="bridge -netns $testns"
+	run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \
+                dstport 4789
+	run_cmd $IP link add name "$brdev" type bridge
+	run_cmd $IP link set dev "$vxlandev" master "$brdev"
+	run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" master
+	run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self
+	run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac brport "$vxlandev"
+	run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev"
+	run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self
+
+	ip netns del $testns &>/dev/null
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: bridge fdb get"
+		end_test "FAIL: bridge fdb get"
 		return 1
 	fi
 
-	echo "PASS: bridge fdb get"
+	end_test "PASS: bridge fdb get"
 }
 
 kci_test_neigh_get()
@@ -1134,50 +1072,38 @@ kci_test_neigh_get()
 	dstip6=dead::2
 	local ret=0
 
-	ip neigh help 2>&1 |grep -q 'ip neigh get'
+	run_cmd_grep 'ip neigh get' ip neigh help
 	if [ $? -ne 0 ];then
-		echo "SKIP: fdb get tests: iproute2 too old"
+		end_test "SKIP: fdb get tests: iproute2 too old"
 		return $ksft_skip
 	fi
 
 	# ipv4
-	ip neigh add $dstip lladdr $dstmac dev "$devdummy"  > /dev/null
-	check_err $?
-	ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
-	check_err $?
-	ip neigh del $dstip lladdr $dstmac dev "$devdummy"  > /dev/null
-	check_err $?
+	run_cmd ip neigh add $dstip lladdr $dstmac dev "$devdummy"
+	run_cmd_grep "$dstmac" ip neigh get $dstip dev "$devdummy"
+	run_cmd ip neigh del $dstip lladdr $dstmac dev "$devdummy"
 
 	# ipv4 proxy
-	ip neigh add proxy $dstip dev "$devdummy" > /dev/null
-	check_err $?
-	ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip"
-	check_err $?
-	ip neigh del proxy $dstip dev "$devdummy" > /dev/null
-	check_err $?
+	run_cmd ip neigh add proxy $dstip dev "$devdummy"
+	run_cmd_grep "$dstip" ip neigh get proxy $dstip dev "$devdummy"
+	run_cmd ip neigh del proxy $dstip dev "$devdummy"
 
 	# ipv6
-	ip neigh add $dstip6 lladdr $dstmac dev "$devdummy"  > /dev/null
-	check_err $?
-	ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
-	check_err $?
-	ip neigh del $dstip6 lladdr $dstmac dev "$devdummy"  > /dev/null
-	check_err $?
+	run_cmd ip neigh add $dstip6 lladdr $dstmac dev "$devdummy"
+	run_cmd_grep "$dstmac" ip neigh get $dstip6 dev "$devdummy"
+	run_cmd ip neigh del $dstip6 lladdr $dstmac dev "$devdummy"
 
 	# ipv6 proxy
-	ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null
-	check_err $?
-	ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6"
-	check_err $?
-	ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null
-	check_err $?
+	run_cmd ip neigh add proxy $dstip6 dev "$devdummy"
+	run_cmd_grep "$dstip6" ip neigh get proxy $dstip6 dev "$devdummy"
+	run_cmd ip neigh del proxy $dstip6 dev "$devdummy"
 
 	if [ $ret -ne 0 ];then
-		echo "FAIL: neigh get"
+		end_test "FAIL: neigh get"
 		return 1
 	fi
 
-	echo "PASS: neigh get"
+	end_test "PASS: neigh get"
 }
 
 kci_test_bridge_parent_id()
@@ -1187,10 +1113,9 @@ kci_test_bridge_parent_id()
 	probed=false
 
 	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
-		modprobe -q netdevsim
-		check_err $?
+		run_cmd modprobe -q netdevsim
 		if [ $ret -ne 0 ]; then
-			echo "SKIP: bridge_parent_id can't load netdevsim"
+			end_test "SKIP: bridge_parent_id can't load netdevsim"
 			return $ksft_skip
 		fi
 		probed=true
@@ -1203,13 +1128,11 @@ kci_test_bridge_parent_id()
 	udevadm settle
 	dev10=`ls ${sysfsnet}10/net/`
 	dev20=`ls ${sysfsnet}20/net/`
-
-	ip link add name test-bond0 type bond mode 802.3ad
-	ip link set dev $dev10 master test-bond0
-	ip link set dev $dev20 master test-bond0
-	ip link add name test-br0 type bridge
-	ip link set dev test-bond0 master test-br0
-	check_err $?
+	run_cmd ip link add name test-bond0 type bond mode 802.3ad
+	run_cmd ip link set dev $dev10 master test-bond0
+	run_cmd ip link set dev $dev20 master test-bond0
+	run_cmd ip link add name test-br0 type bridge
+	run_cmd ip link set dev test-bond0 master test-br0
 
 	# clean up any leftovers
 	ip link del dev test-br0
@@ -1219,82 +1142,191 @@ kci_test_bridge_parent_id()
 	$probed && rmmod netdevsim
 
 	if [ $ret -ne 0 ]; then
-		echo "FAIL: bridge_parent_id"
+		end_test "FAIL: bridge_parent_id"
 		return 1
 	fi
-	echo "PASS: bridge_parent_id"
+	end_test "PASS: bridge_parent_id"
 }
 
-kci_test_rtnl()
+address_get_proto()
 {
+	local addr=$1; shift
+
+	ip -N -j address show dev "$devdummy" |
+	    jq -e -r --arg addr "${addr%/*}" \
+	       '.[].addr_info[] | select(.local == $addr) | .protocol'
+}
+
+address_count()
+{
+	ip -N -j address show dev "$devdummy" "$@" |
+	    jq -e -r '[.[].addr_info[] | .local | select(. != null)] | length'
+}
+
+do_test_address_proto()
+{
+	local what=$1; shift
+	local addr=$1; shift
+	local addr2=${addr%/*}2/${addr#*/}
+	local addr3=${addr%/*}3/${addr#*/}
+	local proto
+	local count
 	local ret=0
-	kci_add_dummy
-	if [ $ret -ne 0 ];then
-		echo "FAIL: cannot add dummy interface"
-		return 1
-	fi
+	local err
 
-	kci_test_polrouting
-	check_err $?
-	kci_test_route_get
+	ip address add dev "$devdummy" "$addr3"
 	check_err $?
-	kci_test_addrlft
+	proto=$(address_get_proto "$addr3")
+	[[ "$proto" == null ]]
 	check_err $?
-	kci_test_promote_secondaries
-	check_err $?
-	kci_test_tc
-	check_err $?
-	kci_test_gre
+
+	ip address add dev "$devdummy" "$addr2" proto 0x99
 	check_err $?
-	kci_test_gretap
+	proto=$(address_get_proto "$addr2")
+	[[ "$proto" == 0x99 ]]
 	check_err $?
-	kci_test_ip6gretap
+
+	ip address add dev "$devdummy" "$addr" proto 0xab
 	check_err $?
-	kci_test_erspan
+	proto=$(address_get_proto "$addr")
+	[[ "$proto" == 0xab ]]
 	check_err $?
-	kci_test_ip6erspan
+
+	ip address replace dev "$devdummy" "$addr" proto 0x11
+	proto=$(address_get_proto "$addr")
 	check_err $?
-	kci_test_bridge
+	[[ "$proto" == 0x11 ]]
 	check_err $?
-	kci_test_addrlabel
+
+	count=$(address_count)
 	check_err $?
-	kci_test_ifalias
+	(( count >= 3 )) # $addr, $addr2 and $addr3 plus any kernel addresses
 	check_err $?
-	kci_test_vrf
+
+	count=$(address_count proto 0)
 	check_err $?
-	kci_test_encap
+	(( count == 1 )) # just $addr3
 	check_err $?
-	kci_test_macsec
+
+	count=$(address_count proto 0x11)
 	check_err $?
-	kci_test_ipsec
+	(( count == 2 )) # $addr and $addr3
 	check_err $?
-	kci_test_ipsec_offload
+
+	count=$(address_count proto 0xab)
 	check_err $?
-	kci_test_fdb_get
+	(( count == 1 )) # just $addr3
 	check_err $?
-	kci_test_neigh_get
+
+	ip address del dev "$devdummy" "$addr"
+	ip address del dev "$devdummy" "$addr2"
+	ip address del dev "$devdummy" "$addr3"
+
+	if [ $ret -ne 0 ]; then
+		end_test "FAIL: address proto $what"
+		return 1
+	fi
+	end_test "PASS: address proto $what"
+}
+
+kci_test_address_proto()
+{
+	local ret=0
+
+	do_test_address_proto IPv4 192.0.2.1/28
 	check_err $?
-	kci_test_bridge_parent_id
+
+	do_test_address_proto IPv6 2001:db8:1::1/64
 	check_err $?
 
+	return $ret
+}
+
+kci_test_enslave_bonding()
+{
+	local bond="bond123"
+	local ret=0
+
+	setup_ns testns
+	if [ $? -ne 0 ]; then
+		end_test "SKIP bonding tests: cannot add net namespace $testns"
+		return $ksft_skip
+	fi
+
+	run_cmd ip -netns $testns link add dev $bond type bond mode balance-rr
+	run_cmd ip -netns $testns link add dev $devdummy type dummy
+	run_cmd ip -netns $testns link set dev $devdummy up
+	run_cmd ip -netns $testns link set dev $devdummy master $bond down
+	if [ $ret -ne 0 ]; then
+		end_test "FAIL: initially up interface added to a bond and set down"
+		ip netns del "$testns"
+		return 1
+	fi
+
+	end_test "PASS: enslave interface in a bond"
+	ip netns del "$testns"
+}
+
+kci_test_rtnl()
+{
+	local current_test
+	local ret=0
+
+	kci_add_dummy
+	if [ $ret -ne 0 ];then
+		end_test "FAIL: cannot add dummy interface"
+		return 1
+	fi
+
+	for current_test in ${TESTS:-$ALL_TESTS}; do
+		$current_test
+		check_err $?
+	done
+
 	kci_del_dummy
 	return $ret
 }
 
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $(echo $ALL_TESTS))
+        -v          Verbose mode (show commands and output)
+        -P          Pause after every test
+        -p          Pause after every failing test before cleanup (for debugging)
+EOF
+}
+
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
-	echo "SKIP: Need root privileges"
+	end_test "SKIP: Need root privileges"
 	exit $ksft_skip
 fi
 
 for x in ip tc;do
 	$x -Version 2>/dev/null >/dev/null
 	if [ $? -ne 0 ];then
-		echo "SKIP: Could not run test without the $x tool"
+		end_test "SKIP: Could not run test without the $x tool"
 		exit $ksft_skip
 	fi
 done
 
+while getopts t:hvpP o; do
+	case $o in
+		t) TESTS=$OPTARG;;
+		v) VERBOSE=1;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+[ $PAUSE = "yes" ] && PAUSE_ON_FAIL="no"
+
 kci_test_rtnl
 
 exit $?
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 8b42e8b04e0f..a59cb6a3c4f5 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -1,9 +1,12 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 if [ $(id -u) != 0 ]; then
 	echo $msg must be run as root >&2
-	exit 0
+	exit $ksft_skip
 fi
 
 ret=0
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index e4613ce4ed69..9eb42570294d 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -18,7 +18,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/errqueue.h>
 
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#include "../kselftest.h"
 
 struct options {
 	int so_timestamp;
diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c
new file mode 100644
index 000000000000..f02f1f95d227
--- /dev/null
+++ b/tools/testing/selftests/net/sctp_hello.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len)
+{
+	if (ss->ss_family == AF_INET) {
+		struct sockaddr_in *a = (struct sockaddr_in *)ss;
+
+		a->sin_addr.s_addr = inet_addr(ip);
+		a->sin_port = htons(atoi(port));
+		*len = sizeof(*a);
+	} else {
+		struct sockaddr_in6 *a = (struct sockaddr_in6 *)ss;
+
+		a->sin6_family = AF_INET6;
+		inet_pton(AF_INET6, ip, &a->sin6_addr);
+		a->sin6_port = htons(atoi(port));
+		*len = sizeof(*a);
+	}
+}
+
+static int do_client(int argc, char *argv[])
+{
+	struct sockaddr_storage ss;
+	char buf[] = "hello";
+	int csk, ret, len;
+
+	if (argc < 5) {
+		printf("%s client -4|6 IP PORT [IP PORT]\n", argv[0]);
+		return -1;
+	}
+
+	bzero((void *)&ss, sizeof(ss));
+	ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6;
+	csk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP);
+	if (csk < 0) {
+		printf("failed to create socket\n");
+		return -1;
+	}
+
+	if (argc >= 7) {
+		set_addr(&ss, argv[5], argv[6], &len);
+		ret = bind(csk, (struct sockaddr *)&ss, len);
+		if (ret < 0) {
+			printf("failed to bind to address\n");
+			return -1;
+		}
+	}
+
+	set_addr(&ss, argv[3], argv[4], &len);
+	ret = connect(csk, (struct sockaddr *)&ss, len);
+	if (ret < 0) {
+		printf("failed to connect to peer\n");
+		return -1;
+	}
+
+	ret = send(csk, buf, strlen(buf) + 1, 0);
+	if (ret < 0) {
+		printf("failed to send msg %d\n", ret);
+		return -1;
+	}
+	close(csk);
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage ss;
+	int lsk, csk, ret, len;
+	char buf[20];
+
+	if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
+		printf("%s server|client ...\n", argv[0]);
+		return -1;
+	}
+
+	if (!strcmp(argv[1], "client"))
+		return do_client(argc, argv);
+
+	if (argc < 5) {
+		printf("%s server -4|6 IP PORT [IFACE]\n", argv[0]);
+		return -1;
+	}
+
+	ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6;
+	lsk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP);
+	if (lsk < 0) {
+		printf("failed to create lsk\n");
+		return -1;
+	}
+
+	if (argc >= 6) {
+		ret = setsockopt(lsk, SOL_SOCKET, SO_BINDTODEVICE,
+				 argv[5], strlen(argv[5]) + 1);
+		if (ret < 0) {
+			printf("failed to bind to device\n");
+			return -1;
+		}
+	}
+
+	set_addr(&ss, argv[3], argv[4], &len);
+	ret = bind(lsk, (struct sockaddr *)&ss, len);
+	if (ret < 0) {
+		printf("failed to bind to address\n");
+		return -1;
+	}
+
+	ret = listen(lsk, 5);
+	if (ret < 0) {
+		printf("failed to listen on port\n");
+		return -1;
+	}
+
+	csk = accept(lsk, (struct sockaddr *)NULL, (socklen_t *)NULL);
+	if (csk < 0) {
+		printf("failed to accept new client\n");
+		return -1;
+	}
+
+	ret = recv(csk, buf, sizeof(buf), 0);
+	if (ret <= 0) {
+		printf("failed to recv msg %d\n", ret);
+		return -1;
+	}
+	close(csk);
+	close(lsk);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
new file mode 100755
index 000000000000..c854034b6aa1
--- /dev/null
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP VRF.
+# TOPO: CLIENT_NS1 (veth1) <---> (veth1) -> vrf_s1
+#                                                  SERVER_NS
+#       CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2
+
+source lib.sh
+CLIENT_IP4="10.0.0.1"
+CLIENT_IP6="2000::1"
+CLIENT_PORT=1234
+
+SERVER_IP4="10.0.0.2"
+SERVER_IP6="2000::2"
+SERVER_PORT=1234
+
+setup() {
+	modprobe sctp
+	modprobe sctp_diag
+	setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS
+
+	ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+	ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+	ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+
+	ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1
+	ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2
+
+	ip -n $CLIENT_NS1 link set veth1 up
+	ip -n $CLIENT_NS1 addr add $CLIENT_IP4/24 dev veth1
+	ip -n $CLIENT_NS1 addr add $CLIENT_IP6/24 dev veth1
+
+	ip -n $CLIENT_NS2 link set veth1 up
+	ip -n $CLIENT_NS2 addr add $CLIENT_IP4/24 dev veth1
+	ip -n $CLIENT_NS2 addr add $CLIENT_IP6/24 dev veth1
+
+	ip -n $SERVER_NS link add dummy1 type dummy
+	ip -n $SERVER_NS link set dummy1 up
+	ip -n $SERVER_NS link add vrf-1 type vrf table 10
+	ip -n $SERVER_NS link add vrf-2 type vrf table 20
+	ip -n $SERVER_NS link set vrf-1 up
+	ip -n $SERVER_NS link set vrf-2 up
+	ip -n $SERVER_NS link set veth1 master vrf-1
+	ip -n $SERVER_NS link set veth2 master vrf-2
+
+	ip -n $SERVER_NS addr add $SERVER_IP4/24 dev dummy1
+	ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth1
+	ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth2
+	ip -n $SERVER_NS addr add $SERVER_IP6/24 dev dummy1
+	ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth1
+	ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth2
+
+	ip -n $SERVER_NS link set veth1 up
+	ip -n $SERVER_NS link set veth2 up
+	ip -n $SERVER_NS route add table 10 $CLIENT_IP4 dev veth1 src $SERVER_IP4
+	ip -n $SERVER_NS route add table 20 $CLIENT_IP4 dev veth2 src $SERVER_IP4
+	ip -n $SERVER_NS route add $CLIENT_IP4 dev veth1 src $SERVER_IP4
+	ip -n $SERVER_NS route add table 10 $CLIENT_IP6 dev veth1 src $SERVER_IP6
+	ip -n $SERVER_NS route add table 20 $CLIENT_IP6 dev veth2 src $SERVER_IP6
+	ip -n $SERVER_NS route add $CLIENT_IP6 dev veth1 src $SERVER_IP6
+}
+
+cleanup() {
+	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+	cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS
+}
+
+wait_server() {
+	local IFACE=$1
+	local CNT=0
+
+	until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \
+		grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do
+		[ $((CNT++)) = "20" ] && { RET=3; return $RET; }
+		sleep 0.1
+	done
+}
+
+do_test() {
+	local CLIENT_NS=$1
+	local IFACE=$2
+
+	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+		$SERVER_PORT $IFACE 2>&1 >/dev/null &
+	disown
+	wait_server $IFACE || return $RET
+	timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+	RET=$?
+	return $RET
+}
+
+do_testx() {
+	local IFACE1=$1
+	local IFACE2=$2
+
+	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+		$SERVER_PORT $IFACE1 2>&1 >/dev/null &
+	disown
+	wait_server $IFACE1 || return $RET
+	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+		$SERVER_PORT $IFACE2 2>&1 >/dev/null &
+	disown
+	wait_server $IFACE2 || return $RET
+	timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \
+	timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+	RET=$?
+	return $RET
+}
+
+testup() {
+	ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null
+	echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y "
+	do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 02: nobind, connect from client 2, l3mdev_accept=1, N "
+	do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null
+	echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N "
+	do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 04: nobind, connect from client 2, l3mdev_accept=0, N "
+	do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 05: bind veth2 in server, connect from client 1, N "
+	do_test $CLIENT_NS1 veth2 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 06: bind veth1 in server, connect from client 1, Y "
+	do_test $CLIENT_NS1 veth1 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 07: bind vrf-1 in server, connect from client 1, Y "
+	do_test $CLIENT_NS1 vrf-1 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 08: bind vrf-2 in server, connect from client 1, N "
+	do_test $CLIENT_NS1 vrf-2 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 09: bind vrf-2 in server, connect from client 2, Y "
+	do_test $CLIENT_NS2 vrf-2 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 10: bind vrf-1 in server, connect from client 2, N "
+	do_test $CLIENT_NS2 vrf-1 && { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 11: bind vrf-1 & 2 in server, connect from client 1 & 2, Y "
+	do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+
+	echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N "
+	do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; }
+	echo "[PASS]"
+}
+
+trap cleanup EXIT
+setup && echo "Testing For SCTP VRF:" && \
+CLIENT_IP=$CLIENT_IP4 SERVER_IP=$SERVER_IP4 AF="-4" testup && echo "***v4 Tests Done***" &&
+CLIENT_IP=$CLIENT_IP6 SERVER_IP=$SERVER_IP6 AF="-6" testup && echo "***v6 Tests Done***"
+exit $?
diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings
new file mode 100644
index 000000000000..ed8418e8217a
--- /dev/null
+++ b/tools/testing/selftests/net/settings
@@ -0,0 +1 @@
+timeout=3600
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100644
index 000000000000..2070b57849de
--- /dev/null
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+readonly server_ns=$(mktemp -u server-XXXXXXXX)
+readonly client_ns=$(mktemp -u client-XXXXXXXX)
+
+netdev_check_for_carrier() {
+	local -r dev="$1"
+
+	for i in {1..5}; do
+		carrier="$(cat /sys/class/net/${dev}/carrier)"
+		if [[ "${carrier}" -ne 1 ]] ; then
+			echo "carrier not ready yet..." >&2
+			sleep 1
+		else
+			echo "carrier ready" >&2
+			break
+		fi
+	done
+	echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+	local dev="$1"
+
+	# Fail hard if cannot turn on loopback mode for current NIC
+	ethtool -K "${dev}" loopback on || exit 1
+	sleep 1
+
+	# Check for the carrier
+	carrier=$(netdev_check_for_carrier ${dev})
+	if [[ "${carrier}" -ne 1 ]] ; then
+		echo "setup_loopback_environment failed"
+		exit 1
+	fi
+}
+
+setup_macvlan_ns(){
+	local -r link_dev="$1"
+	local -r ns_name="$2"
+	local -r ns_dev="$3"
+	local -r ns_mac="$4"
+	local -r addr="$5"
+
+	ip link add link "${link_dev}" dev "${ns_dev}" \
+		address "${ns_mac}" type macvlan
+	exit_code=$?
+	if [[ "${exit_code}" -ne 0 ]]; then
+		echo "setup_macvlan_ns failed"
+		exit $exit_code
+	fi
+
+	[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+	ip link set dev "${ns_dev}" netns "${ns_name}"
+	ip -netns "${ns_name}" link set dev "${ns_dev}" up
+	if [[ -n "${addr}" ]]; then
+		ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+	fi
+
+	sleep 1
+}
+
+cleanup_macvlan_ns(){
+	while (( $# >= 2 )); do
+		ns_name="$1"
+		ns_dev="$2"
+		ip -netns "${ns_name}" link del dev "${ns_dev}"
+		ip netns del "${ns_name}"
+		shift 2
+	done
+}
+
+cleanup_loopback(){
+	local -r dev="$1"
+
+	ethtool -K "${dev}" loopback off
+	sleep 1
+
+	# Check for the carrier
+	carrier=$(netdev_check_for_carrier ${dev})
+	if [[ "${carrier}" -ne 1 ]] ; then
+		echo "setup_loopback_environment failed"
+		exit 1
+	fi
+}
+
+setup_interrupt() {
+	# Use timer on  host to trigger the network stack
+	# Also disable device interrupt to not depend on NIC interrupt
+	# Reduce test flakiness caused by unexpected interrupts
+	echo 100000 >"${FLUSH_PATH}"
+	echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+	# Set up server_ns namespace and client_ns namespace
+	setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
+	setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+	cleanup_macvlan_ns ${server_ns} server ${client_ns} client
+}
+
+setup() {
+	setup_loopback_environment "${dev}"
+	setup_interrupt
+}
+
+cleanup() {
+	cleanup_loopback "${dev}"
+
+	echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+	echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
new file mode 100644
index 000000000000..1f78a87f6f37
--- /dev/null
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly server_ns=$(mktemp -u server-XXXXXXXX)
+readonly client_ns=$(mktemp -u client-XXXXXXXX)
+
+setup_veth_ns() {
+	local -r link_dev="$1"
+	local -r ns_name="$2"
+	local -r ns_dev="$3"
+	local -r ns_mac="$4"
+
+	[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+	echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+	ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
+	ip -netns "${ns_name}" link set dev "${ns_dev}" up
+
+	ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
+}
+
+setup_ns() {
+	# Set up server_ns namespace and client_ns namespace
+	ip link add name server type veth peer name client
+
+	setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
+	setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+	local ns_name
+
+	for ns_name in ${client_ns} ${server_ns}; do
+		[[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
+	done
+}
+
+setup() {
+	# no global init setup step needed
+	:
+}
+
+cleanup() {
+	cleanup_ns
+}
diff --git a/tools/testing/selftests/net/sk_bind_sendto_listen.c b/tools/testing/selftests/net/sk_bind_sendto_listen.c
new file mode 100644
index 000000000000..b420d830f72c
--- /dev/null
+++ b/tools/testing/selftests/net/sk_bind_sendto_listen.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+	int fd1, fd2, one = 1;
+	struct sockaddr_in6 bind_addr = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(20000),
+		.sin6_flowinfo = htonl(0),
+		.sin6_addr = {},
+		.sin6_scope_id = 0,
+	};
+
+	inet_pton(AF_INET6, "::", &bind_addr.sin6_addr);
+
+	fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+	if (fd1 < 0) {
+		error(1, errno, "socket fd1");
+		return -1;
+	}
+
+	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+		error(1, errno, "setsockopt(SO_REUSEADDR) fd1");
+		goto out_err1;
+	}
+
+	if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+		error(1, errno, "bind fd1");
+		goto out_err1;
+	}
+
+	if (sendto(fd1, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr,
+		   sizeof(bind_addr))) {
+		error(1, errno, "sendto fd1");
+		goto out_err1;
+	}
+
+	fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+	if (fd2 < 0) {
+		error(1, errno, "socket fd2");
+		goto out_err1;
+	}
+
+	if (setsockopt(fd2, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+		error(1, errno, "setsockopt(SO_REUSEADDR) fd2");
+		goto out_err2;
+	}
+
+	if (bind(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+		error(1, errno, "bind fd2");
+		goto out_err2;
+	}
+
+	if (sendto(fd2, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr,
+		   sizeof(bind_addr)) != -1) {
+		error(1, errno, "sendto fd2");
+		goto out_err2;
+	}
+
+	if (listen(fd2, 0)) {
+		error(1, errno, "listen");
+		goto out_err2;
+	}
+
+	close(fd2);
+	close(fd1);
+	return 0;
+
+out_err2:
+	close(fd2);
+
+out_err1:
+	close(fd1);
+	return -1;
+}
diff --git a/tools/testing/selftests/net/sk_connect_zero_addr.c b/tools/testing/selftests/net/sk_connect_zero_addr.c
new file mode 100644
index 000000000000..4be418aefd9f
--- /dev/null
+++ b/tools/testing/selftests/net/sk_connect_zero_addr.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+	int fd1, fd2, one = 1;
+	struct sockaddr_in6 bind_addr = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(20000),
+		.sin6_flowinfo = htonl(0),
+		.sin6_addr = {},
+		.sin6_scope_id = 0,
+	};
+
+	inet_pton(AF_INET6, "::", &bind_addr.sin6_addr);
+
+	fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+	if (fd1 < 0) {
+		error(1, errno, "socket fd1");
+		return -1;
+	}
+
+	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+		error(1, errno, "setsockopt(SO_REUSEADDR) fd1");
+		goto out_err1;
+	}
+
+	if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+		error(1, errno, "bind fd1");
+		goto out_err1;
+	}
+
+	if (listen(fd1, 0)) {
+		error(1, errno, "listen");
+		goto out_err1;
+	}
+
+	fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+	if (fd2 < 0) {
+		error(1, errno, "socket fd2");
+		goto out_err1;
+	}
+
+	if (connect(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+		error(1, errno, "bind fd2");
+		goto out_err2;
+	}
+
+	close(fd2);
+	close(fd1);
+	return 0;
+
+out_err2:
+	close(fd2);
+out_err1:
+	close(fd1);
+	return -1;
+}
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
new file mode 100644
index 000000000000..e9fa14e10732
--- /dev/null
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <fcntl.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/sysinfo.h>
+
+#include "../kselftest_harness.h"
+
+FIXTURE(so_incoming_cpu)
+{
+	int *servers;
+	union {
+		struct sockaddr addr;
+		struct sockaddr_in in_addr;
+	};
+	socklen_t addrlen;
+};
+
+enum when_to_set {
+	BEFORE_REUSEPORT,
+	BEFORE_LISTEN,
+	AFTER_LISTEN,
+	AFTER_ALL_LISTEN,
+};
+
+FIXTURE_VARIANT(so_incoming_cpu)
+{
+	int when_to_set;
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, before_reuseport)
+{
+	.when_to_set = BEFORE_REUSEPORT,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, before_listen)
+{
+	.when_to_set = BEFORE_LISTEN,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, after_listen)
+{
+	.when_to_set = AFTER_LISTEN,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen)
+{
+	.when_to_set = AFTER_ALL_LISTEN,
+};
+
+static void write_sysctl(struct __test_metadata *_metadata,
+			 char *filename, char *string)
+{
+	int fd, len, ret;
+
+	fd = open(filename, O_WRONLY);
+	ASSERT_NE(fd, -1);
+
+	len = strlen(string);
+	ret = write(fd, string, len);
+	ASSERT_EQ(ret, len);
+}
+
+static void setup_netns(struct __test_metadata *_metadata)
+{
+	ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+	ASSERT_EQ(system("ip link set lo up"), 0);
+
+	write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001");
+	write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0");
+}
+
+#define NR_PORT				(60001 - 10000 - 1)
+#define NR_CLIENT_PER_SERVER_DEFAULT	32
+static int nr_client_per_server, nr_server, nr_client;
+
+FIXTURE_SETUP(so_incoming_cpu)
+{
+	setup_netns(_metadata);
+
+	nr_server = get_nprocs();
+	ASSERT_LE(2, nr_server);
+
+	if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT)
+		nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT;
+	else
+		nr_client_per_server = NR_PORT / nr_server;
+
+	nr_client = nr_client_per_server * nr_server;
+
+	self->servers = malloc(sizeof(int) * nr_server);
+	ASSERT_NE(self->servers, NULL);
+
+	self->in_addr.sin_family = AF_INET;
+	self->in_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	self->in_addr.sin_port = htons(0);
+	self->addrlen = sizeof(struct sockaddr_in);
+}
+
+FIXTURE_TEARDOWN(so_incoming_cpu)
+{
+	int i;
+
+	for (i = 0; i < nr_server; i++)
+		close(self->servers[i]);
+
+	free(self->servers);
+}
+
+void set_so_incoming_cpu(struct __test_metadata *_metadata, int fd, int cpu)
+{
+	int ret;
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, sizeof(int));
+	ASSERT_EQ(ret, 0);
+}
+
+int create_server(struct __test_metadata *_metadata,
+		  FIXTURE_DATA(so_incoming_cpu) *self,
+		  const FIXTURE_VARIANT(so_incoming_cpu) *variant,
+		  int cpu)
+{
+	int fd, ret;
+
+	fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
+	ASSERT_NE(fd, -1);
+
+	if (variant->when_to_set == BEFORE_REUSEPORT)
+		set_so_incoming_cpu(_metadata, fd, cpu);
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &(int){1}, sizeof(int));
+	ASSERT_EQ(ret, 0);
+
+	ret = bind(fd, &self->addr, self->addrlen);
+	ASSERT_EQ(ret, 0);
+
+	if (variant->when_to_set == BEFORE_LISTEN)
+		set_so_incoming_cpu(_metadata, fd, cpu);
+
+	/* We don't use nr_client_per_server here not to block
+	 * this test at connect() if SO_INCOMING_CPU is broken.
+	 */
+	ret = listen(fd, nr_client);
+	ASSERT_EQ(ret, 0);
+
+	if (variant->when_to_set == AFTER_LISTEN)
+		set_so_incoming_cpu(_metadata, fd, cpu);
+
+	return fd;
+}
+
+void create_servers(struct __test_metadata *_metadata,
+		    FIXTURE_DATA(so_incoming_cpu) *self,
+		    const FIXTURE_VARIANT(so_incoming_cpu) *variant)
+{
+	int i, ret;
+
+	for (i = 0; i < nr_server; i++) {
+		self->servers[i] = create_server(_metadata, self, variant, i);
+
+		if (i == 0) {
+			ret = getsockname(self->servers[i], &self->addr, &self->addrlen);
+			ASSERT_EQ(ret, 0);
+		}
+	}
+
+	if (variant->when_to_set == AFTER_ALL_LISTEN) {
+		for (i = 0; i < nr_server; i++)
+			set_so_incoming_cpu(_metadata, self->servers[i], i);
+	}
+}
+
+void create_clients(struct __test_metadata *_metadata,
+		    FIXTURE_DATA(so_incoming_cpu) *self)
+{
+	cpu_set_t cpu_set;
+	int i, j, fd, ret;
+
+	for (i = 0; i < nr_server; i++) {
+		CPU_ZERO(&cpu_set);
+
+		CPU_SET(i, &cpu_set);
+		ASSERT_EQ(CPU_COUNT(&cpu_set), 1);
+		ASSERT_NE(CPU_ISSET(i, &cpu_set), 0);
+
+		/* Make sure SYN will be processed on the i-th CPU
+		 * and finally distributed to the i-th listener.
+		 */
+		ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+		ASSERT_EQ(ret, 0);
+
+		for (j = 0; j < nr_client_per_server; j++) {
+			fd  = socket(AF_INET, SOCK_STREAM, 0);
+			ASSERT_NE(fd, -1);
+
+			ret = connect(fd, &self->addr, self->addrlen);
+			ASSERT_EQ(ret, 0);
+
+			close(fd);
+		}
+	}
+}
+
+void verify_incoming_cpu(struct __test_metadata *_metadata,
+			 FIXTURE_DATA(so_incoming_cpu) *self)
+{
+	int i, j, fd, cpu, ret, total = 0;
+	socklen_t len = sizeof(int);
+
+	for (i = 0; i < nr_server; i++) {
+		for (j = 0; j < nr_client_per_server; j++) {
+			/* If we see -EAGAIN here, SO_INCOMING_CPU is broken */
+			fd = accept(self->servers[i], &self->addr, &self->addrlen);
+			ASSERT_NE(fd, -1);
+
+			ret = getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len);
+			ASSERT_EQ(ret, 0);
+			ASSERT_EQ(cpu, i);
+
+			close(fd);
+			total++;
+		}
+	}
+
+	ASSERT_EQ(total, nr_client);
+	TH_LOG("SO_INCOMING_CPU is very likely to be "
+	       "working correctly with %d sockets.", total);
+}
+
+TEST_F(so_incoming_cpu, test1)
+{
+	create_servers(_metadata, self, variant);
+	create_clients(_metadata, self);
+	verify_incoming_cpu(_metadata, self);
+}
+
+TEST_F(so_incoming_cpu, test2)
+{
+	int server;
+
+	create_servers(_metadata, self, variant);
+
+	/* No CPU specified */
+	server = create_server(_metadata, self, variant, -1);
+	close(server);
+
+	create_clients(_metadata, self);
+	verify_incoming_cpu(_metadata, self);
+}
+
+TEST_F(so_incoming_cpu, test3)
+{
+	int server, client;
+
+	create_servers(_metadata, self, variant);
+
+	/* No CPU specified */
+	server = create_server(_metadata, self, variant, -1);
+
+	create_clients(_metadata, self);
+
+	/* Never receive any requests */
+	client = accept(server, &self->addr, &self->addrlen);
+	ASSERT_EQ(client, -1);
+
+	verify_incoming_cpu(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c
new file mode 100644
index 000000000000..b39e87e967cd
--- /dev/null
+++ b/tools/testing/selftests/net/so_netns_cookie.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+#define pr_err(fmt, ...) \
+	({ \
+		fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+			__func__, __LINE__, ##__VA_ARGS__); \
+		1; \
+	})
+
+int main(int argc, char *argvp[])
+{
+	uint64_t cookie1, cookie2;
+	socklen_t vallen;
+	int sock1, sock2;
+
+	sock1 = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock1 < 0)
+		return pr_err("Unable to create TCP socket");
+
+	vallen = sizeof(cookie1);
+	if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0)
+		return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+	if (!cookie1)
+		return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+	if (unshare(CLONE_NEWNET))
+		return pr_err("unshare");
+
+	sock2 = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock2 < 0)
+		return pr_err("Unable to create TCP socket");
+
+	vallen = sizeof(cookie2);
+	if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0)
+		return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+	if (!cookie2)
+		return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+	if (cookie1 == cookie2)
+		return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns");
+
+	close(sock1);
+	close(sock2);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 3155fbbf644b..8457b7ccbc09 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -2,9 +2,12 @@
 /*
  * Test the SO_TXTIME API
  *
- * Takes two streams of { payload, delivery time }[], one input and one output.
- * Sends the input stream and verifies arrival matches the output stream.
- * The two streams can differ due to out-of-order delivery and drops.
+ * Takes a stream of { payload, delivery time }[], to be sent across two
+ * processes. Start this program on two separate network namespaces or
+ * connected hosts, one instance in transmit mode and the other in receive
+ * mode using the '-r' option. Receiver will compare arrival timestamps to
+ * the expected stream. Sender will read transmit timestamps from the error
+ * queue. The streams can differ due to out-of-order delivery and drops.
  */
 
 #define _GNU_SOURCE
@@ -28,14 +31,17 @@
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
+#include <poll.h>
 
 static int	cfg_clockid	= CLOCK_TAI;
-static bool	cfg_do_ipv4;
-static bool	cfg_do_ipv6;
 static uint16_t	cfg_port	= 8000;
 static int	cfg_variance_us	= 4000;
+static uint64_t	cfg_start_time_ns;
+static int	cfg_mark;
+static bool	cfg_rx;
 
 static uint64_t glob_tstart;
+static uint64_t tdeliver_max;
 
 /* encode one timed transmission (of a 1B payload) */
 struct timed_send {
@@ -44,18 +50,21 @@ struct timed_send {
 };
 
 #define MAX_NUM_PKT	8
-static struct timed_send cfg_in[MAX_NUM_PKT];
-static struct timed_send cfg_out[MAX_NUM_PKT];
+static struct timed_send cfg_buf[MAX_NUM_PKT];
 static int cfg_num_pkt;
 
 static int cfg_errq_level;
 static int cfg_errq_type;
 
-static uint64_t gettime_ns(void)
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+static socklen_t cfg_alen;
+
+static uint64_t gettime_ns(clockid_t clock)
 {
 	struct timespec ts;
 
-	if (clock_gettime(cfg_clockid, &ts))
+	if (clock_gettime(clock, &ts))
 		error(1, errno, "gettime");
 
 	return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
@@ -75,6 +84,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
 
 	msg.msg_iov = &iov;
 	msg.msg_iovlen = 1;
+	msg.msg_name = (struct sockaddr *)&cfg_dst_addr;
+	msg.msg_namelen = cfg_alen;
 
 	if (ts->delay_us >= 0) {
 		memset(control, 0, sizeof(control));
@@ -82,6 +93,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
 		msg.msg_controllen = sizeof(control);
 
 		tdeliver = glob_tstart + ts->delay_us * 1000;
+		tdeliver_max = tdeliver_max > tdeliver ?
+			       tdeliver_max : tdeliver;
 
 		cm = CMSG_FIRSTHDR(&msg);
 		cm->cmsg_level = SOL_SOCKET;
@@ -98,7 +111,7 @@ static void do_send_one(int fdt, struct timed_send *ts)
 
 }
 
-static bool do_recv_one(int fdr, struct timed_send *ts)
+static void do_recv_one(int fdr, struct timed_send *ts)
 {
 	int64_t tstop, texpect;
 	char rbuf[2];
@@ -106,13 +119,13 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
 
 	ret = recv(fdr, rbuf, sizeof(rbuf), 0);
 	if (ret == -1 && errno == EAGAIN)
-		return true;
+		error(1, EAGAIN, "recv: timeout");
 	if (ret == -1)
 		error(1, errno, "read");
 	if (ret != 1)
 		error(1, 0, "read: %dB", ret);
 
-	tstop = (gettime_ns() - glob_tstart) / 1000;
+	tstop = (gettime_ns(cfg_clockid) - glob_tstart) / 1000;
 	texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
 
 	fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n",
@@ -121,10 +134,11 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
 	if (rbuf[0] != ts->data)
 		error(1, 0, "payload mismatch. expected %c", ts->data);
 
-	if (llabs(tstop - texpect) > cfg_variance_us)
-		error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
-
-	return false;
+	if (llabs(tstop - texpect) > cfg_variance_us) {
+		fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us);
+		if (!getenv("KSFT_MACHINE_SLOW"))
+			exit(1);
+	}
 }
 
 static void do_recv_verify_empty(int fdr)
@@ -137,18 +151,18 @@ static void do_recv_verify_empty(int fdr)
 		error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
 }
 
-static void do_recv_errqueue_timeout(int fdt)
+static int do_recv_errqueue_timeout(int fdt)
 {
 	char control[CMSG_SPACE(sizeof(struct sock_extended_err)) +
 		     CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
 	char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) +
 		  sizeof(struct udphdr) + 1];
 	struct sock_extended_err *err;
+	int ret, num_tstamp = 0;
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
 	struct cmsghdr *cm;
 	int64_t tstamp = 0;
-	int ret;
 
 	iov.iov_base = data;
 	iov.iov_len = sizeof(data);
@@ -196,7 +210,7 @@ static void do_recv_errqueue_timeout(int fdt)
 		default:
 			error(1, 0, "errqueue: errno %u code %u\n",
 			      err->ee_errno, err->ee_code);
-		};
+		}
 
 		tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info;
 		tstamp -= (int64_t) glob_tstart;
@@ -206,9 +220,47 @@ static void do_recv_errqueue_timeout(int fdt)
 
 		msg.msg_flags = 0;
 		msg.msg_controllen = sizeof(control);
+		num_tstamp++;
 	}
 
-	error(1, 0, "recv: timeout");
+	return num_tstamp;
+}
+
+static void recv_errqueue_msgs(int fdt)
+{
+	struct pollfd pfd = { .fd = fdt, .events = POLLERR };
+	const int timeout_ms = 10;
+	int ret, num_tstamp = 0;
+
+	do {
+		ret = poll(&pfd, 1, timeout_ms);
+		if (ret == -1)
+			error(1, errno, "poll");
+
+		if (ret && (pfd.revents & POLLERR))
+			num_tstamp += do_recv_errqueue_timeout(fdt);
+
+		if (num_tstamp == cfg_num_pkt)
+			break;
+
+	} while (gettime_ns(cfg_clockid) < tdeliver_max);
+}
+
+static void start_time_wait(void)
+{
+	uint64_t now;
+	int err;
+
+	if (!cfg_start_time_ns)
+		return;
+
+	now = gettime_ns(CLOCK_REALTIME);
+	if (cfg_start_time_ns < now)
+		return;
+
+	err = usleep((cfg_start_time_ns - now) / 1000);
+	if (err)
+		error(1, errno, "usleep");
 }
 
 static void setsockopt_txtime(int fd)
@@ -245,6 +297,10 @@ static int setup_tx(struct sockaddr *addr, socklen_t alen)
 
 	setsockopt_txtime(fd);
 
+	if (cfg_mark &&
+	    setsockopt(fd, SOL_SOCKET, SO_MARK, &cfg_mark, sizeof(cfg_mark)))
+		error(1, errno, "setsockopt mark");
+
 	return fd;
 }
 
@@ -266,31 +322,70 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen)
 	return fd;
 }
 
-static void do_test(struct sockaddr *addr, socklen_t alen)
+static void do_test_tx(struct sockaddr *addr, socklen_t alen)
 {
-	int fdt, fdr, i;
+	int fdt, i;
 
 	fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
 			addr->sa_family == PF_INET ? '4' : '6',
 			cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
 
 	fdt = setup_tx(addr, alen);
-	fdr = setup_rx(addr, alen);
 
-	glob_tstart = gettime_ns();
+	start_time_wait();
+	glob_tstart = gettime_ns(cfg_clockid);
 
 	for (i = 0; i < cfg_num_pkt; i++)
-		do_send_one(fdt, &cfg_in[i]);
+		do_send_one(fdt, &cfg_buf[i]);
+
+	recv_errqueue_msgs(fdt);
+
+	if (close(fdt))
+		error(1, errno, "close t");
+}
+
+static void do_test_rx(struct sockaddr *addr, socklen_t alen)
+{
+	int fdr, i;
+
+	fdr = setup_rx(addr, alen);
+
+	start_time_wait();
+	glob_tstart = gettime_ns(cfg_clockid);
+
 	for (i = 0; i < cfg_num_pkt; i++)
-		if (do_recv_one(fdr, &cfg_out[i]))
-			do_recv_errqueue_timeout(fdt);
+		do_recv_one(fdr, &cfg_buf[i]);
 
 	do_recv_verify_empty(fdr);
 
 	if (close(fdr))
 		error(1, errno, "close r");
-	if (close(fdt))
-		error(1, errno, "close t");
+}
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	}
 }
 
 static int parse_io(const char *optarg, struct timed_send *array)
@@ -323,17 +418,46 @@ static int parse_io(const char *optarg, struct timed_send *array)
 	return aoff / 2;
 }
 
+static void usage(const char *progname)
+{
+	fprintf(stderr, "\nUsage: %s [options] <payload>\n"
+			"Options:\n"
+			"  -4            only IPv4\n"
+			"  -6            only IPv6\n"
+			"  -c <clock>    monotonic or tai (default)\n"
+			"  -D <addr>     destination IP address (server)\n"
+			"  -S <addr>     source IP address (client)\n"
+			"  -r            run rx mode\n"
+			"  -t <nsec>     start time (UTC nanoseconds)\n"
+			"  -m <mark>     socket mark\n"
+			"\n",
+			progname);
+	exit(1);
+}
+
 static void parse_opts(int argc, char **argv)
 {
-	int c, ilen, olen;
+	char *daddr = NULL, *saddr = NULL;
+	int domain = PF_UNSPEC;
+	int c;
 
-	while ((c = getopt(argc, argv, "46c:")) != -1) {
+	while ((c = getopt(argc, argv, "46c:S:D:rt:m:")) != -1) {
 		switch (c) {
 		case '4':
-			cfg_do_ipv4 = true;
+			if (domain != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			domain = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			cfg_errq_level = SOL_IP;
+			cfg_errq_type = IP_RECVERR;
 			break;
 		case '6':
-			cfg_do_ipv6 = true;
+			if (domain != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			domain = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			cfg_errq_level = SOL_IPV6;
+			cfg_errq_type = IPV6_RECVERR;
 			break;
 		case 'c':
 			if (!strcmp(optarg, "tai"))
@@ -344,50 +468,50 @@ static void parse_opts(int argc, char **argv)
 			else
 				error(1, 0, "unknown clock id %s", optarg);
 			break;
+		case 'S':
+			saddr = optarg;
+			break;
+		case 'D':
+			daddr = optarg;
+			break;
+		case 'r':
+			cfg_rx = true;
+			break;
+		case 't':
+			cfg_start_time_ns = strtoll(optarg, NULL, 0);
+			break;
+		case 'm':
+			cfg_mark = strtol(optarg, NULL, 0);
+			break;
 		default:
-			error(1, 0, "parse error at %d", optind);
+			usage(argv[0]);
 		}
 	}
 
-	if (argc - optind != 2)
-		error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+	if (argc - optind != 1)
+		usage(argv[0]);
 
-	ilen = parse_io(argv[optind], cfg_in);
-	olen = parse_io(argv[optind + 1], cfg_out);
-	if (ilen != olen)
-		error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
-	cfg_num_pkt = ilen;
+	if (domain == PF_UNSPEC)
+		error(1, 0, "Pass one of -4 or -6");
+	if (!daddr)
+		error(1, 0, "-D <server addr> required\n");
+	if (!cfg_rx && !saddr)
+		error(1, 0, "-S <client addr> required\n");
+
+	setup_sockaddr(domain, daddr, &cfg_dst_addr);
+	setup_sockaddr(domain, saddr, &cfg_src_addr);
+
+	cfg_num_pkt = parse_io(argv[optind], cfg_buf);
 }
 
 int main(int argc, char **argv)
 {
 	parse_opts(argc, argv);
 
-	if (cfg_do_ipv6) {
-		struct sockaddr_in6 addr6 = {0};
-
-		addr6.sin6_family = AF_INET6;
-		addr6.sin6_port = htons(cfg_port);
-		addr6.sin6_addr = in6addr_loopback;
-
-		cfg_errq_level = SOL_IPV6;
-		cfg_errq_type = IPV6_RECVERR;
-
-		do_test((void *)&addr6, sizeof(addr6));
-	}
-
-	if (cfg_do_ipv4) {
-		struct sockaddr_in addr4 = {0};
-
-		addr4.sin_family = AF_INET;
-		addr4.sin_port = htons(cfg_port);
-		addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-
-		cfg_errq_level = SOL_IP;
-		cfg_errq_type = IP_RECVERR;
-
-		do_test((void *)&addr4, sizeof(addr4));
-	}
+	if (cfg_rx)
+		do_test_rx((void *)&cfg_dst_addr, cfg_alen);
+	else
+		do_test_tx((void *)&cfg_src_addr, cfg_alen);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
index 3f7800eaecb1..5e861ad32a42 100755
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -3,34 +3,108 @@
 #
 # Regression tests for the SO_TXTIME interface
 
-# Run in network namespace
-if [[ $# -eq 0 ]]; then
-	if ! ./in_netns.sh $0 __subprocess; then
-		# test is time sensitive, can be flaky
-		echo "test failed: retry once"
-		./in_netns.sh $0 __subprocess
+set -e
+
+readonly ksft_skip=4
+readonly DEV="veth0"
+readonly BIN="./so_txtime"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+cleanup() {
+	ip netns del "${NS2}"
+	ip netns del "${NS1}"
+}
+
+trap cleanup EXIT
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" netns "${NS1}" type veth \
+  peer name "${DEV}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+
+run_test() {
+	local readonly IP="$1"
+	local readonly CLOCK="$2"
+	local readonly TXARGS="$3"
+	local readonly RXARGS="$4"
+
+	if [[ "${IP}" == "4" ]]; then
+		local readonly SADDR="${SADDR4}"
+		local readonly DADDR="${DADDR4}"
+	elif [[ "${IP}" == "6" ]]; then
+		local readonly SADDR="${SADDR6}"
+		local readonly DADDR="${DADDR6}"
+	else
+		echo "Invalid IP version ${IP}"
+		exit 1
 	fi
 
-	exit $?
-fi
+	local readonly START="$(date +%s%N --date="+ 0.1 seconds")"
 
-set -e
+	ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r &
+	ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}"
+	wait "$!"
+}
 
-tc qdisc add dev lo root fq
-./so_txtime -4 -6 -c mono a,-1 a,-1
-./so_txtime -4 -6 -c mono a,0 a,0
-./so_txtime -4 -6 -c mono a,10 a,10
-./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
-./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
-
-if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then
-	! ./so_txtime -4 -6 -c tai a,-1 a,-1
-	! ./so_txtime -4 -6 -c tai a,0 a,0
-	./so_txtime -4 -6 -c tai a,10 a,10
-	./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
-	./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+do_test() {
+	run_test $@
+	[ $? -ne 0 ] && ret=1
+}
+
+do_fail_test() {
+	run_test $@
+	[ $? -eq 0 ] && ret=1
+}
+
+ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq
+set +e
+ret=0
+do_test 4 mono a,-1 a,-1
+do_test 6 mono a,0 a,0
+do_test 6 mono a,10 a,10
+do_test 4 mono a,10,b,20 a,10,b,20
+do_test 6 mono a,20,b,10 b,20,a,20
+
+if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then
+	do_fail_test 4 tai a,-1 a,-1
+	do_fail_test 6 tai a,0 a,0
+	do_test 6 tai a,10 a,10
+	do_test 4 tai a,10,b,20 a,10,b,20
+	do_test 6 tai a,20,b,10 b,10,a,20
 else
 	echo "tc ($(tc -V)) does not support qdisc etf. skipping"
+	[ $ret -eq 0 ] && ret=$ksft_skip
 fi
 
-echo OK. All tests passed
+if [ $ret -eq 0 ]; then
+	echo OK. All tests passed
+elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then
+	echo "Ignoring errors due to slow environment" 1>&2
+	ret=0
+fi
+exit $ret
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
index afca1ead677f..db1aeb8c5d1e 100644
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@@ -7,6 +7,8 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 
+#include "../kselftest.h"
+
 struct socket_testcase {
 	int	domain;
 	int	type;
@@ -31,7 +33,6 @@ static struct socket_testcase tests[] = {
 	{ AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1  },
 };
 
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 #define ERR_STRING_SZ	64
 
 static int run_tests(void)
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
new file mode 100755
index 000000000000..02d617040793
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -0,0 +1,573 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for
+# implementing IPv4/IPv6 L3 VPN use cases.
+#
+# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and
+# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and
+# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a
+# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic.
+# The SRv6 End.DT46 Behavior implementation is meant to support the
+# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel.
+# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies
+# the setup and operations of SRv6 VPNs.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with
+# each other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the
+# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected
+# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100
+# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach
+# hs-t200-3 and vice versa.
+#
+# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT46 Behavior and c) VRF.
+#
+# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly
+# consider an example where, within the same domain of tenant 100, the host
+# hs-t100-1 pings the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a arp/ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2,
+# the router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the
+# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6)
+# core network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the
+# host hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the roles of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6
+# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are
+# able to connect with each other.
+#
+#
+# +-------------------+                                   +-------------------+
+# |                   |                                   |                   |
+# |  hs-t100-1 netns  |                                   |  hs-t100-2 netns  |
+# |                   |                                   |                   |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::1/64 |  |                                   |  |  cafe::2/64 |  |
+# |  | 10.0.0.1/24 |  |                                   |  | 10.0.0.2/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |        .          |                                   |         .         |
+# +-------------------+                                   +-------------------+
+#          .                                                        .
+#          .                                                        .
+#          .                                                        .
+# +-----------------------------------+   +-----------------------------------+
+# |        .                          |   |                         .         |
+# | +---------------+                 |   |                 +---------------- |
+# | |   veth-t100   |                 |   |                 |   veth-t100   | |
+# | |  cafe::254/64 |                 |   |                 |  cafe::254/64 | |
+# | | 10.0.0.254/24 |    +----------+ |   | +----------+    | 10.0.0.254/24 | |
+# | +-------+-------+    | localsid | |   | | localsid |    +-------+-------- |
+# |         |            |   table  | |   | |   table  |            |         |
+# |    +----+----+       +----------+ |   | +----------+       +----+----+    |
+# |    | vrf-100 |                    |   |                    | vrf-100 |    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |                    |   veth0    | |   | |   veth0    |                    |
+# |                    | fd00::1/64 |.|...|.| fd00::2/64 |                    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |    | vrf-200 |                    |   |                    | vrf-200 |    |
+# |    +----+----+                    |   |                    +----+----+    |
+# |         |                         |   |                         |         |
+# | +-------+-------+                 |   |                 +-------+-------- |
+# | |   veth-t200   |                 |   |                 |   veth-t200   | |
+# | |  cafe::254/64 |                 |   |                 |  cafe::254/64 | |
+# | | 10.0.0.254/24 |                 |   |                 | 10.0.0.254/24 | |
+# | +---------------+      rt-1 netns |   | rt-2 netns      +---------------- |
+# |        .                          |   |                          .        |
+# +-----------------------------------+   +-----------------------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-------------------+                                   +-------------------+
+# |        .          |                                   |          .        |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::3/64 |  |                                   |  |  cafe::4/64 |  |
+# |  | 10.0.0.3/24 |  |                                   |  | 10.0.0.4/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |                   |                                   |                   |
+# |  hs-t200-3 netns  |                                   |  hs-t200-4 netns  |
+# |                   |                                   |                   |
+# +-------------------+                                   +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID              |Action                          |
+# +--------------------------------------------------+
+# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::2    |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth-t100               |
+# +---------------------------------------------------+
+# |10.0.0.2   |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100               |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::4    |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth-t200               |
+# +---------------------------------------------------+
+# |10.0.0.4   |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200               |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID              |Action                          |
+# +--------------------------------------------------+
+# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::1    |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth-t100               |
+# +---------------------------------------------------+
+# |10.0.0.1   |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100               |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::3    |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth-t200               |
+# +---------------------------------------------------+
+# |10.0.0.3   |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200               |
+# +---------------------------------------------------+
+#
+
+source lib.sh
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+cleanup()
+{
+	ip link del veth-rt-1 2>/dev/null || true
+	ip link del veth-rt-2 2>/dev/null || true
+
+	cleanup_all_ns
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local id=$1
+	eval local nsname=\${rt_${id}}
+
+	ip link set veth-rt-${id} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${id} name veth0
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
+	ip -netns ${nsname} link set veth0 up
+	ip -netns ${nsname} link set lo up
+
+	ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+	local hid=$1
+	local rid=$2
+	local tid=$3
+	eval local hsname=\${hs_t${tid}_${hid}}
+	eval local rtname=\${rt_${rid}}
+	local rtveth=veth-t${tid}
+
+	# set the networking for the host
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
+	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
+	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
+	ip -netns ${hsname} link set veth0 up
+	ip -netns ${hsname} link set lo up
+
+	# configure the VRF for the tenant X on the router which is directly
+	# connected to the source host.
+	ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+	ip -netns ${rtname} link set vrf-${tid} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	# enslave the veth-tX interface to the vrf-X in the access router
+	ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+	ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+	ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+	ip -netns ${rtname} link set ${rtveth} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+	ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+	local hssrc=$1
+	local rtsrc=$2
+	local hsdst=$3
+	local rtdst=$4
+	local tid=$5
+
+	eval local rtsrc_name=\${rt_${rtsrc}}
+	eval local rtdst_name=\${rt_${rtdst}}
+	local rtveth=veth-t${tid}
+	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046
+
+	ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+	# set the encap route for encapsulating packets which arrive from the
+	# host hssrc and destined to the access router rtsrc.
+	ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+		via fd00::${rtdst} dev veth0
+
+	# set the decap route for decapsulating packets which arrive from
+	# the rtdst router and destined to the hsdst host.
+	ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+		encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid}
+
+	# all sids for VPNs start with a common locator which is fc00::/16.
+	# Routes for handling the SRv6 End.DT46 behavior instances are grouped
+	# together in the 'localsid' table.
+	#
+	# NOTE: added only once
+	if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+	    grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+		ip -netns ${rtdst_name} -6 rule add \
+			to ${VPN_LOCATOR_SERVICE}::/16 \
+			lookup ${LOCALSID_TABLE_ID} prio 999
+	fi
+
+	# set default routes to unreachable for both ipv4 and ipv6
+	ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \
+		vrf vrf-${tid}
+
+	ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \
+		vrf vrf-${tid}
+}
+
+setup()
+{
+	ip link add veth-rt-1 type veth peer name veth-rt-2
+	# setup the networking for router rt-1 and router rt-2
+	setup_ns rt_1 rt_2
+	setup_rt_networking 1
+	setup_rt_networking 2
+
+	# setup two hosts for the tenant 100.
+	#  - host hs-1 is directly connected to the router rt-1;
+	#  - host hs-2 is directly connected to the router rt-2.
+	setup_ns hs_t100_1 hs_t100_2
+	setup_hs 1 1 100  #args: host router tenant
+	setup_hs 2 2 100
+
+	# setup two hosts for the tenant 200
+	#  - host hs-3 is directly connected to the router rt-1;
+	#  - host hs-4 is directly connected to the router rt-2.
+	setup_ns hs_t200_3 hs_t200_4
+	setup_hs 3 1 200
+	setup_hs 4 2 200
+
+	# setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host
+	# hs-t100-2 within the same tenant 100.
+	setup_vpn_config 1 1 2 2 100  #args: src_host src_router dst_host dst_router tenant
+	setup_vpn_config 2 2 1 1 100
+
+	# setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host
+	# hs-t200-4 within the same tenant 200.
+	setup_vpn_config 3 1 4 2 200
+	setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+	eval local nsname=\${rt_${rtsrc}}
+
+	ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+		>/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	check_rt_connectivity ${rtsrc} ${rtdst}
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+	eval local nsname=\${hs_t${tid}_${hssrc}}
+
+	ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+	eval local nsname=\${hs_t${tid}_${hssrc}}
+
+	ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+	check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc=$1
+	local tidsrc=$2
+	local hsdst=$3
+	local tiddst=$4
+
+	check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+	check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc=$1
+	local tid=$2
+
+	check_hs_ipv6_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+	check_hs_ipv4_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+}
+
+router_tests()
+{
+	log_section "IPv6 routers connectivity test"
+
+	check_and_log_rt_connectivity 1 2
+	check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+	log_section "IPv4/IPv6 connectivity test among hosts and gateway"
+
+	check_and_log_hs2gw_connectivity 1 100
+	check_and_log_hs2gw_connectivity 2 100
+
+	check_and_log_hs2gw_connectivity 3 200
+	check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+	check_and_log_hs_connectivity 1 2 100
+	check_and_log_hs_connectivity 2 1 100
+
+	check_and_log_hs_connectivity 3 4 200
+	check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+	local i
+	local j
+	local k
+	local tmp
+	local l1="1 2"
+	local l2="3 4"
+	local t1=100
+	local t2=200
+
+	log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+		tmp=${t1}; t1=${t2}; t2=${tmp}
+	done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+        echo "SKIP: vrf sysctl does not exist"
+        exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
new file mode 100755
index 000000000000..79fb81e63c59
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -0,0 +1,496 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT4 behavior used for
+# implementing IPv4 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT4 behavior and c) VRF.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT4 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv4 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+                                   +-------------------+
+# |                   |                                   |                   |
+# |  hs-t100-1 netns  |                                   |  hs-t100-2 netns  |
+# |                   |                                   |                   |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  | 10.0.0.1/24 |  |                                   |  | 10.0.0.2/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |        .          |                                   |         .         |
+# +-------------------+                                   +-------------------+
+#          .                                                        .
+#          .                                                        .
+#          .                                                        .
+# +-----------------------------------+   +-----------------------------------+
+# |        .                          |   |                         .         |
+# | +---------------+                 |   |                 +---------------- |
+# | |   veth-t100   |                 |   |                 |   veth-t100   | |
+# | | 10.0.0.254/24 |    +----------+ |   | +----------+    | 10.0.0.254/24 | |
+# | +-------+-------+    | localsid | |   | | localsid |    +-------+-------- |
+# |         |            |   table  | |   | |   table  |            |         |
+# |    +----+----+       +----------+ |   | +----------+       +----+----+    |
+# |    | vrf-100 |                    |   |                    | vrf-100 |    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |                    |   veth0    | |   | |   veth0    |                    |
+# |                    | fd00::1/64 |.|...|.| fd00::2/64 |                    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |    | vrf-200 |                    |   |                    | vrf-200 |    |
+# |    +----+----+                    |   |                    +----+----+    |
+# |         |                         |   |                         |         |
+# | +-------+-------+                 |   |                 +-------+-------- |
+# | |   veth-t200   |                 |   |                 |   veth-t200   | |
+# | | 10.0.0.254/24 |                 |   |                 | 10.0.0.254/24 | |
+# | +---------------+      rt-1 netns |   | rt-2 netns      +---------------- |
+# |        .                          |   |                          .        |
+# +-----------------------------------+   +-----------------------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-------------------+                                   +-------------------+
+# |        .          |                                   |          .        |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  | 10.0.0.3/24 |  |                                   |  | 10.0.0.4/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |                   |                                   |                   |
+# |  hs-t200-3 netns  |                                   |  hs-t200-4 netns  |
+# |                   |                                   |                   |
+# +-------------------+                                   +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.2   |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.4   |apply seg6 encap segs fc00:12:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.1   |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.3   |apply seg6 encap segs fc00:21:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+
+source lib.sh
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+cleanup()
+{
+	ip link del veth-rt-1 2>/dev/null || true
+	ip link del veth-rt-2 2>/dev/null || true
+
+	cleanup_all_ns
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local id=$1
+	eval local nsname=\${rt_${id}}
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip link set veth-rt-${id} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${id} name veth0
+
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
+	ip -netns ${nsname} link set veth0 up
+	ip -netns ${nsname} link set lo up
+
+	ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+	local hid=$1
+	local rid=$2
+	local tid=$3
+	eval local hsname=\${hs_t${tid}_${hid}}
+	eval local rtname=\${rt_${rid}}
+	local rtveth=veth-t${tid}
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
+	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
+	ip -netns ${hsname} link set veth0 up
+	ip -netns ${hsname} link set lo up
+
+	# configure the VRF for the tenant X on the router which is directly
+	# connected to the source host.
+	ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+	ip -netns ${rtname} link set vrf-${tid} up
+
+	# enslave the veth-tX interface to the vrf-X in the access router
+	ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+	ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+	ip -netns ${rtname} link set ${rtveth} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+	ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+	local hssrc=$1
+	local rtsrc=$2
+	local hsdst=$3
+	local rtdst=$4
+	local tid=$5
+
+	eval local rtsrc_name=\${rt_${rtsrc}}
+	eval local rtdst_name=\${rt_${rtdst}}
+	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004
+
+	# set the encap route for encapsulating packets which arrive from the
+	# host hssrc and destined to the access router rtsrc.
+	ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+		via fd00::${rtdst} dev veth0
+
+	# set the decap route for decapsulating packets which arrive from
+	# the rtdst router and destined to the hsdst host.
+	ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+		encap seg6local action End.DT4 vrftable ${tid} dev vrf-${tid}
+
+	# all sids for VPNs start with a common locator which is fc00::/16.
+	# Routes for handling the SRv6 End.DT4 behavior instances are grouped
+	# together in the 'localsid' table.
+	#
+	# NOTE: added only once
+	if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+	    grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+		ip -netns ${rtdst_name} -6 rule add \
+			to ${VPN_LOCATOR_SERVICE}::/16 \
+			lookup ${LOCALSID_TABLE_ID} prio 999
+	fi
+}
+
+setup()
+{
+	ip link add veth-rt-1 type veth peer name veth-rt-2
+	# setup the networking for router rt-1 and router rt-2
+	setup_ns rt_1 rt_2
+	setup_rt_networking 1
+	setup_rt_networking 2
+
+	# setup two hosts for the tenant 100.
+	#  - host hs-1 is directly connected to the router rt-1;
+	#  - host hs-2 is directly connected to the router rt-2.
+	setup_ns hs_t100_1 hs_t100_2
+	setup_hs 1 1 100  #args: host router tenant
+	setup_hs 2 2 100
+
+	# setup two hosts for the tenant 200
+	#  - host hs-3 is directly connected to the router rt-1;
+	#  - host hs-4 is directly connected to the router rt-2.
+	setup_ns hs_t200_3 hs_t200_4
+	setup_hs 3 1 200
+	setup_hs 4 2 200
+
+	# setup the IPv4 L3 VPN which connects the host hs-t100-1 and host
+	# hs-t100-2 within the same tenant 100.
+	setup_vpn_config 1 1 2 2 100  #args: src_host src_router dst_host dst_router tenant
+	setup_vpn_config 2 2 1 1 100
+
+	# setup the IPv4 L3 VPN which connects the host hs-t200-3 and host
+	# hs-t200-4 within the same tenant 200.
+	setup_vpn_config 3 1 4 2 200
+	setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+	eval local nsname=\${rt_${rtsrc}}
+
+	ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+		>/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	check_rt_connectivity ${rtsrc} ${rtdst}
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+	eval local nsname=\${hs_t${tid}_${hssrc}}
+
+	ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc=$1
+	local tidsrc=$2
+	local hsdst=$3
+	local tiddst=$4
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc=$1
+	local tid=$2
+
+	check_hs_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+	log_section "IPv6 routers connectivity test"
+
+	check_and_log_rt_connectivity 1 2
+	check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+	log_section "IPv4 connectivity test among hosts and gateway"
+
+	check_and_log_hs2gw_connectivity 1 100
+	check_and_log_hs2gw_connectivity 2 100
+
+	check_and_log_hs2gw_connectivity 3 200
+	check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+	check_and_log_hs_connectivity 1 2 100
+	check_and_log_hs_connectivity 2 1 100
+
+	check_and_log_hs_connectivity 3 4 200
+	check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+	local i
+	local j
+	local k
+	local tmp
+	local l1="1 2"
+	local l2="3 4"
+	local t1=100
+	local t2=200
+
+	log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+		tmp=${t1}; t1=${t2}; t2=${tmp}
+	done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+        echo "SKIP: vrf sysctl does not exist"
+        exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
new file mode 100755
index 000000000000..e408406d8489
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -0,0 +1,501 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@cnit.it>
+
+# This test is designed for evaluating the new SRv6 End.DT6 behavior used for
+# implementing IPv6 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv6 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv6
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT6 behavior and c) VRF.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT6 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv6 L3 VPN for tenant 200 works exactly as the IPv6 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+                                   +-------------------+
+# |                   |                                   |                   |
+# |  hs-t100-1 netns  |                                   |  hs-t100-2 netns  |
+# |                   |                                   |                   |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::1/64 |  |                                   |  |  cafe::2/64 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |        .          |                                   |         .         |
+# +-------------------+                                   +-------------------+
+#          .                                                        .
+#          .                                                        .
+#          .                                                        .
+# +-----------------------------------+   +-----------------------------------+
+# |        .                          |   |                         .         |
+# | +---------------+                 |   |                 +---------------- |
+# | |   veth-t100   |                 |   |                 |   veth-t100   | |
+# | |  cafe::254/64 |    +----------+ |   | +----------+    |  cafe::254/64 | |
+# | +-------+-------+    | localsid | |   | | localsid |    +-------+-------- |
+# |         |            |   table  | |   | |   table  |            |         |
+# |    +----+----+       +----------+ |   | +----------+       +----+----+    |
+# |    | vrf-100 |                    |   |                    | vrf-100 |    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |                    |   veth0    | |   | |   veth0    |                    |
+# |                    | fd00::1/64 |.|...|.| fd00::2/64 |                    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |    | vrf-200 |                    |   |                    | vrf-200 |    |
+# |    +----+----+                    |   |                    +----+----+    |
+# |         |                         |   |                         |         |
+# | +-------+-------+                 |   |                 +-------+-------- |
+# | |   veth-t200   |                 |   |                 |   veth-t200   | |
+# | |  cafe::254/64 |                 |   |                 |  cafe::254/64 | |
+# | +---------------+      rt-1 netns |   | rt-2 netns      +---------------- |
+# |        .                          |   |                          .        |
+# +-----------------------------------+   +-----------------------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-------------------+                                   +-------------------+
+# |        .          |                                   |          .        |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::3/64 |  |                                   |  |  cafe::4/64 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |                   |                                   |                   |
+# |  hs-t200-3 netns  |                                   |  hs-t200-4 netns  |
+# |                   |                                   |                   |
+# +-------------------+                                   +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:21:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::2    |apply seg6 encap segs fc00:12:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::4    |apply seg6 encap segs fc00:12:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:12:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::1    |apply seg6 encap segs fc00:21:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::3    |apply seg6 encap segs fc00:21:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+
+source lib.sh
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+cleanup()
+{
+	ip link del veth-rt-1 2>/dev/null || true
+	ip link del veth-rt-2 2>/dev/null || true
+
+	cleanup_all_ns
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local id=$1
+	eval local nsname=\${rt_${id}}
+
+	ip link set veth-rt-${id} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${id} name veth0
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
+	ip -netns ${nsname} link set veth0 up
+	ip -netns ${nsname} link set lo up
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+	local hid=$1
+	local rid=$2
+	local tid=$3
+	eval local hsname=\${hs_t${tid}_${hid}}
+	eval local rtname=\${rt_${rid}}
+	local rtveth=veth-t${tid}
+
+	# set the networking for the host
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
+	ip -netns ${hsname} link set veth0 up
+	ip -netns ${hsname} link set lo up
+
+	# configure the VRF for the tenant X on the router which is directly
+	# connected to the source host.
+	ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+	ip -netns ${rtname} link set vrf-${tid} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	# enslave the veth-tX interface to the vrf-X in the access router
+	ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+	ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+	ip -netns ${rtname} link set ${rtveth} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+
+	ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+	local hssrc=$1
+	local rtsrc=$2
+	local hsdst=$3
+	local rtdst=$4
+	local tid=$5
+
+	eval local rtsrc_name=\${rt_${rtsrc}}
+	eval local rtdst_name=\${rt_${rtdst}}
+	local rtveth=veth-t${tid}
+	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006
+
+	ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+	# set the encap route for encapsulating packets which arrive from the
+	# host hssrc and destined to the access router rtsrc.
+	ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+		via fd00::${rtdst} dev veth0
+
+	# set the decap route for decapsulating packets which arrive from
+	# the rtdst router and destined to the hsdst host.
+	ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+		encap seg6local action End.DT6 vrftable ${tid} dev vrf-${tid}
+
+	# all sids for VPNs start with a common locator which is fc00::/16.
+	# Routes for handling the SRv6 End.DT6 behavior instances are grouped
+	# together in the 'localsid' table.
+	#
+	# NOTE: added only once
+	if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+	    grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+		ip -netns ${rtdst_name} -6 rule add \
+			to ${VPN_LOCATOR_SERVICE}::/16 \
+			lookup ${LOCALSID_TABLE_ID} prio 999
+	fi
+}
+
+setup()
+{
+	ip link add veth-rt-1 type veth peer name veth-rt-2
+	# setup the networking for router rt-1 and router rt-2
+	setup_ns rt_1 rt_2
+	setup_rt_networking 1
+	setup_rt_networking 2
+
+	# setup two hosts for the tenant 100.
+	#  - host hs-1 is directly connected to the router rt-1;
+	#  - host hs-2 is directly connected to the router rt-2.
+	setup_ns hs_t100_1 hs_t100_2
+	setup_hs 1 1 100  #args: host router tenant
+	setup_hs 2 2 100
+
+	# setup two hosts for the tenant 200
+	#  - host hs-3 is directly connected to the router rt-1;
+	#  - host hs-4 is directly connected to the router rt-2.
+	setup_ns hs_t200_3 hs_t200_4
+	setup_hs 3 1 200
+	setup_hs 4 2 200
+
+	# setup the IPv6 L3 VPN which connects the host hs-t100-1 and host
+	# hs-t100-2 within the same tenant 100.
+	setup_vpn_config 1 1 2 2 100  #args: src_host src_router dst_host dst_router tenant
+	setup_vpn_config 2 2 1 1 100
+
+	# setup the IPv6 L3 VPN which connects the host hs-t200-3 and host
+	# hs-t200-4 within the same tenant 200.
+	setup_vpn_config 3 1 4 2 200
+	setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+	eval local nsname=\${rt_${rtsrc}}
+
+	ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+		>/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	check_rt_connectivity ${rtsrc} ${rtdst}
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+	eval local nsname=\${hs_t${tid}_${hssrc}}
+
+	ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc=$1
+	local tidsrc=$2
+	local hsdst=$3
+	local tiddst=$4
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc=$1
+	local tid=$2
+
+	check_hs_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+	log_section "IPv6 routers connectivity test"
+
+	check_and_log_rt_connectivity 1 2
+	check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+	log_section "IPv6 connectivity test among hosts and gateway"
+
+	check_and_log_hs2gw_connectivity 1 100
+	check_and_log_hs2gw_connectivity 2 100
+
+	check_and_log_hs2gw_connectivity 3 200
+	check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+	check_and_log_hs_connectivity 1 2 100
+	check_and_log_hs_connectivity 2 1 100
+
+	check_and_log_hs_connectivity 3 4 200
+	check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+	local i
+	local j
+	local k
+	local tmp
+	local l1="1 2"
+	local l2="3 4"
+	local t1=100
+	local t2=200
+
+	log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+		tmp=${t1}; t1=${t2}; t2=${tmp}
+	done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+        echo "SKIP: vrf sysctl does not exist"
+        exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
new file mode 100755
index 000000000000..50563443a4ad
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -0,0 +1,869 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+#
+# This script is designed to test the support for "flavors" in the SRv6 End
+# behavior.
+#
+# Flavors defined in RFC8986 [1] represent additional operations that can modify
+# or extend the existing SRv6 End, End.X and End.T behaviors. For the sake of
+# convenience, we report the list of flavors described in [1] hereafter:
+#   - Penultimate Segment Pop (PSP);
+#   - Ultimate Segment Pop (USP);
+#   - Ultimate Segment Decapsulation (USD).
+#
+# The End, End.X, and End.T behaviors can support these flavors either
+# individually or in combinations.
+# Currently in this selftest we consider only the PSP flavor for the SRv6 End
+# behavior. However, it is possible to extend the script as soon as other
+# flavors will be supported in the kernel.
+#
+# The purpose of the PSP flavor consists in instructing the penultimate node
+# listed in the SRv6 policy to remove (i.e. pop) the outermost SRH from the IPv6
+# header.
+# A PSP enabled SRv6 End behavior instance processes the SRH by:
+#  - decrementing the Segment Left (SL) value from 1 to 0;
+#  - copying the last SID from the SID List into the IPv6 Destination Address
+#    (DA);
+#  - removing the SRH from the extension headers following the IPv6 header.
+#
+# Once the SRH is removed, the IPv6 packet is forwarded to the destination using
+# the IPv6 DA updated during the PSP operation (i.e. the IPv6 DA corresponding
+# to the last SID carried by the removed SRH).
+#
+# Although the PSP flavor can be set for any SRv6 End behavior instance on any
+# SR node, it will be active only on such behaviors bound to a penultimate SID
+# for a given SRv6 policy.
+#                                                SL=2 SL=1 SL=0
+#                                                  |    |    |
+# For example, given the SRv6 policy (SID List := <X,   Y,   Z>):
+#  - a PSP enabled SRv6 End behavior bound to SID Y will apply the PSP operation
+#    as Segment Left (SL) is 1, corresponding to the Penultimate Segment of the
+#    SID List;
+#  - a PSP enabled SRv6 End behavior bound to SID X will *NOT* apply the PSP
+#    operation as the Segment Left is 2. This behavior instance will apply the
+#    "standard" End packet processing, ignoring the configured PSP flavor at
+#    all.
+#
+# [1] RFC8986: https://datatracker.ietf.org/doc/html/rfc8986
+#
+# Network topology
+# ================
+#
+# The network topology used in this selftest is depicted hereafter, composed by
+# two hosts (hs-1, hs-2) and four routers (rt-1, rt-2, rt-3, rt-4).
+# Hosts hs-1 and hs-2 are connected to routers rt-1 and rt-2, respectively,
+# allowing them to communicate with each other.
+# Traffic exchanged between hs-1 and hs-2 can follow different network paths.
+# The network operator, through specific SRv6 Policies can steer traffic to one
+# path rather than another. In this selftest this is implemented as follows:
+#
+#   i) The SRv6 H.Insert behavior applies SRv6 Policies on traffic received by
+#      connected hosts. It pushes the Segment Routing Header (SRH) after the
+#      IPv6 header. The SRH contains the SID List (i.e. SRv6 Policy) needed for
+#      steering traffic across the segments/waypoints specified in that list;
+#
+#  ii) The SRv6 End behavior advances the active SID in the SID List carried by
+#      the SRH;
+#
+# iii) The PSP enabled SRv6 End behavior is used to remove the SRH when such
+#      behavior is configured on a node bound to the Penultimate Segment carried
+#      by the SID List.
+#
+#                cafe::1                      cafe::2
+#              +--------+                   +--------+
+#              |        |                   |        |
+#              |  hs-1  |                   |  hs-2  |
+#              |        |                   |        |
+#              +---+----+                   +--- +---+
+#     cafe::/64    |                             |      cafe::/64
+#                  |                             |
+#              +---+----+                   +----+---+
+#              |        |  fcf0:0:1:2::/64  |        |
+#              |  rt-1  +-------------------+  rt-2  |
+#              |        |                   |        |
+#              +---+----+                   +----+---+
+#                  |      .               .      |
+#                  |  fcf0:0:1:3::/64   .        |
+#                  |          .       .          |
+#                  |            .   .            |
+#  fcf0:0:1:4::/64 |              .              | fcf0:0:2:3::/64
+#                  |            .   .            |
+#                  |          .       .          |
+#                  |  fcf0:0:2:4::/64   .        |
+#                  |      .               .      |
+#              +---+----+                   +----+---+
+#              |        |                   |        |
+#              |  rt-4  +-------------------+  rt-3  |
+#              |        |  fcf0:0:3:4::/64  |        |
+#              +---+----+                   +----+---+
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
+# the IPv6 operator network.
+#
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+#   Local SID table for SRv6 router rt-x
+#   +---------------------------------------------------------------------+
+#   |fcff:x::e is associated with the SRv6 End behavior                   |
+#   |fcff:x::ef1 is associated with the SRv6 End behavior with PSP flavor |
+#   +---------------------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for the SIDs. Reachability of
+# SIDs is ensured by proper configuration of the IPv6 operator's network and
+# SRv6 routers.
+#
+#
+# SRv6 Policies
+# =============
+#
+# An SRv6 ingress router applies different SRv6 Policies to the traffic received
+# from connected hosts on the basis of the destination addresses.
+# In case of SRv6 H.Insert behavior, the SRv6 Policy enforcement consists of
+# pushing the SRH (carrying a given SID List) after the existing IPv6 header.
+# Note that in the inserting mode, there is no encapsulation at all.
+#
+#   Before applying an SRv6 Policy using the SRv6 H.Insert behavior
+#   +------+---------+
+#   | IPv6 | Payload |
+#   +------+---------+
+#
+#   After applying an SRv6 Policy using the SRv6 H.Insert behavior
+#   +------+-----+---------+
+#   | IPv6 | SRH | Payload |
+#   +------+-----+---------+
+#
+# Traffic from hs-1 to hs-2
+# -------------------------
+#
+# Packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policy:
+#
+#   i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::ef1,fcff:2::ef1,cafe::2
+#
+# Router rt-1 is configured to enforce the Policy (i.a) through the SRv6
+# H.Insert behavior which pushes the SRH after the existing IPv6 header. This
+# Policy steers the traffic from hs-1 across rt-3, rt-4, rt-2 and finally to the
+# destination hs-2.
+#
+# As the packet reaches the router rt-3, the SRv6 End behavior bound to SID
+# fcff:3::e is triggered. The behavior updates the Segment Left (from SL=3 to
+# SL=2) in the SRH, the IPv6 DA with fcff:4::ef1 and forwards the packet to the
+# next router on the path, i.e. rt-4.
+#
+# When router rt-4 receives the packet, the PSP enabled SRv6 End behavior bound
+# to SID fcff:4::ef1 is executed. Since the SL=2, the PSP operation is *NOT*
+# kicked in and the behavior applies the default End processing: the Segment
+# Left is decreased (from SL=2 to SL=1), the IPv6 DA is updated with the SID
+# fcff:2::ef1 and the packet is forwarded to router rt-2.
+#
+# The PSP enabled SRv6 End behavior on rt-2 is associated with SID fcff:2::ef1
+# and is executed as the packet is received. Because SL=1, the behavior applies
+# the PSP processing on the packet as follows: i) SL is decreased, i.e. from
+# SL=1 to SL=0; ii) last SID (cafe::2) is copied into the IPv6 DA; iii) the
+# outermost SRH is removed from the extension headers following the IPv6 header.
+# Once the PSP processing is completed, the packet is forwarded to the host hs-2
+# (destination).
+#
+# Traffic from hs-2 to hs-1
+# -------------------------
+#
+# Packets generated from hs-2 and directed to hs-1 are handled by rt-2 which
+# applies the following SRv6 Policy:
+#
+#   i.b) IPv6 traffic, SID List=fcff:1::ef1,cafe::1
+#
+# Router rt-2 is configured to enforce the Policy (i.b) through the SRv6
+# H.Insert behavior which pushes the SRH after the existing IPv6 header. This
+# Policy steers the traffic from hs-2 across rt-1 and finally to the
+# destination hs-1
+#
+#
+# When the router rt-1 receives the packet, the PSP enabled SRv6 End behavior
+# associated with the SID fcff:1::ef1 is triggered. Since the SL=1,
+# the PSP operation takes place: i) the SL is decremented; ii) the IPv6 DA is
+# set with the last SID; iii) the SRH is removed from the extension headers
+# after the IPv6 header. At this point, the packet with IPv6 DA=cafe::1 is sent
+# to the destination, i.e. hs-1.
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly RT2HS_DEVNAME="veth1"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv6_TESTS_ADDR=2001:db8::1
+readonly LOCATOR_SERVICE=fcff
+readonly END_FUNC=000e
+readonly END_PSP_FUNC=0ef1
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+	local rc="$1"
+	local expected="$2"
+	local msg="$3"
+
+	if [ "${rc}" -eq "${expected}" ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	printf "\nTests passed: %3d\n" "${nsuccess}"
+	printf "Tests failed: %3d\n"   "${nfail}"
+
+	# when a test fails, the value of 'ret' is set to 1 (error code).
+	# Conversely, when all tests are passed successfully, the 'ret' value
+	# is set to 0 (success code).
+	if [ "${ret}" -ne 1 ]; then
+		ret=0
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+	local cmd="$1"
+
+	if [ ! -x "$(command -v "${cmd}")" ]; then
+		echo "SKIP: Could not run test without \"${cmd}\" tool";
+		exit "${ksft_skip}"
+	fi
+}
+
+get_nodename()
+{
+	local name="$1"
+
+	echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+	local rtid="$1"
+
+	get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+	local hsid="$1"
+
+	get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+	local name="$1"
+
+	ip netns add "${name}"
+}
+
+create_router()
+{
+	local rtid="$1"
+	local nsname
+
+	nsname="$(get_rtname "${rtid}")"
+
+	__create_namespace "${nsname}"
+}
+
+create_host()
+{
+	local hsid="$1"
+	local nsname
+
+	nsname="$(get_hsname "${hsid}")"
+
+	__create_namespace "${nsname}"
+}
+
+cleanup()
+{
+	local nsname
+	local i
+
+	# destroy routers
+	for i in ${ROUTERS}; do
+		nsname="$(get_rtname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# destroy hosts
+	for i in ${HOSTS}; do
+		nsname="$(get_hsname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# check whether the setup phase was completed successfully or not. In
+	# case of an error during the setup phase of the testing environment,
+	# the selftest is considered as "skipped".
+	if [ "${SETUP_ERR}" -ne 0 ]; then
+		echo "SKIP: Setting up the testing environment failed"
+		exit "${ksft_skip}"
+	fi
+
+	exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local neigh
+	local nsname
+	local neigh_nsname
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		neigh_nsname="$(get_rtname "${neigh}")"
+
+		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+			type veth peer name "veth-rt-${neigh}-${rt}" \
+			netns "${neigh_nsname}"
+	done
+}
+
+get_network_prefix()
+{
+	local rt="$1"
+	local neigh="$2"
+	local p="${rt}"
+	local q="${neigh}"
+
+	if [ "${p}" -gt "${q}" ]; then
+		p="${q}"; q="${rt}"
+	fi
+
+	echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Given the description of a router <id:op> as an input, the function returns
+# the <id> token which represents the ID of the router.
+# i.e. input: "12:psp"
+#      output: "12"
+__get_srv6_rtcfg_id()
+{
+	local element="$1"
+
+	echo "${element}" | cut -d':' -f1
+}
+
+# Given the description of a router <id:op> as an input, the function returns
+# the <op> token which represents the operation (e.g. End behavior with or
+# withouth flavors) configured for the node.
+
+# Note that when the operation represents an End behavior with a list of
+# flavors, the output is the ordered version of that list.
+# i.e. input: "5:usp,psp,usd"
+#      output: "psp,usd,usp"
+__get_srv6_rtcfg_op()
+{
+	local element="$1"
+
+	# return the lexicographically ordered flavors
+	echo "${element}" | cut -d':' -f2 | sed 's/,/\n/g' | sort | \
+		xargs | sed 's/ /,/g'
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local nsname
+	local net_prefix
+	local devname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		ip -netns "${nsname}" addr \
+			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+		ip -netns "${nsname}" link set "${devname}" up
+	done
+
+	ip -netns "${nsname}" link set lo up
+
+	ip -netns "${nsname}" link add ${DUMMY_DEVNAME} type dummy
+	ip -netns "${nsname}" link set ${DUMMY_DEVNAME} up
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local net_prefix
+	local devname
+	local nsname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		# set underlay network routes for SIDs reachability
+		ip -netns "${nsname}" -6 route \
+			add "${LOCATOR_SERVICE}:${neigh}::/32" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+	done
+
+	# Local End behavior (note that "dev" is a dummy interface chosen for
+	# the sake of simplicity).
+	ip -netns "${nsname}" -6 route \
+		add "${LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End dev "${DUMMY_DEVNAME}"
+
+
+	# all SIDs start with a common locator. Routes and SRv6 Endpoint
+	# behavior instaces are grouped together in the 'localsid' table.
+	ip -netns "${nsname}" -6 rule \
+		add to "${LOCATOR_SERVICE}::/16" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+
+	# set default routes to unreachable
+	ip -netns "${nsname}" -6 route \
+		add unreachable default metric 4278198272 \
+		dev "${DUMMY_DEVNAME}"
+}
+
+# This helper function builds and installs the SID List (i.e. SRv6 Policy)
+# to be applied on incoming packets at the ingress node. Moreover, it
+# configures the SRv6 nodes specified in the SID List to process the traffic
+# according to the operations required by the Policy itself.
+# args:
+#  $1 - destination host (i.e. cafe::x host)
+#  $2 - SRv6 router configured for enforcing the SRv6 Policy
+#  $3 - compact way to represent a list of SRv6 routers with their operations
+#       (i.e. behaviors) that each of them needs to perform. Every <nodeid:op>
+#       element constructs a SID that is associated with the behavior <op> on
+#       the <nodeid> node. The list of such elements forms an SRv6 Policy.
+__setup_rt_policy()
+{
+	local dst="$1"
+	local encap_rt="$2"
+	local policy_rts="$3"
+	local behavior_cfg
+	local in_nsname
+	local rt_nsname
+	local policy=''
+	local function
+	local fullsid
+	local op_type
+	local node
+	local n
+
+	in_nsname="$(get_rtname "${encap_rt}")"
+
+	for n in ${policy_rts}; do
+		node="$(__get_srv6_rtcfg_id "${n}")"
+		op_type="$(__get_srv6_rtcfg_op "${n}")"
+		rt_nsname="$(get_rtname "${node}")"
+
+		case "${op_type}" in
+		"noflv")
+			policy="${policy}${LOCATOR_SERVICE}:${node}::${END_FUNC},"
+			function="${END_FUNC}"
+			behavior_cfg="End"
+			;;
+
+		"psp")
+			policy="${policy}${LOCATOR_SERVICE}:${node}::${END_PSP_FUNC},"
+			function="${END_PSP_FUNC}"
+			behavior_cfg="End flavors psp"
+			;;
+
+		*)
+			break
+			;;
+		esac
+
+		fullsid="${LOCATOR_SERVICE}:${node}::${function}"
+
+		# add SRv6 Endpoint behavior to the selected router
+		if ! ip -netns "${rt_nsname}" -6 route get "${fullsid}" \
+			&>/dev/null; then
+			ip -netns "${rt_nsname}" -6 route \
+				add "${fullsid}" \
+				table "${LOCALSID_TABLE_ID}" \
+				encap seg6local action ${behavior_cfg} \
+				dev "${DUMMY_DEVNAME}"
+		fi
+	done
+
+	# we need to remove the trailing comma to avoid inserting an empty
+	# address (::0) in the SID List.
+	policy="${policy%,}"
+
+	# add SRv6 policy to incoming traffic sent by connected hosts
+	ip -netns "${in_nsname}" -6 route \
+		add "${IPv6_HS_NETWORK}::${dst}" \
+		encap seg6 mode inline segs "${policy}" \
+		dev "${DUMMY_DEVNAME}"
+
+	ip -netns "${in_nsname}" -6 neigh \
+		add proxy "${IPv6_HS_NETWORK}::${dst}" \
+		dev "${RT2HS_DEVNAME}"
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+	__setup_rt_policy "$1" "$2" "$3"
+}
+
+setup_hs()
+{
+	local hs="$1"
+	local rt="$2"
+	local hsname
+	local rtname
+
+	hsname="$(get_hsname "${hs}")"
+	rtname="$(get_rtname "${rt}")"
+
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns "${hsname}" link add veth0 type veth \
+		peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+	ip -netns "${hsname}" addr \
+		add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+
+	ip -netns "${hsname}" link set veth0 up
+	ip -netns "${hsname}" link set lo up
+
+	ip -netns "${rtname}" addr \
+		add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+
+	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+}
+
+setup()
+{
+	local i
+
+	# create routers
+	ROUTERS="1 2 3 4"; readonly ROUTERS
+	for i in ${ROUTERS}; do
+		create_router "${i}"
+	done
+
+	# create hosts
+	HOSTS="1 2"; readonly HOSTS
+	for i in ${HOSTS}; do
+		create_host "${i}"
+	done
+
+	# set up the links for connecting routers
+	add_link_rt_pairs 1 "2 3 4"
+	add_link_rt_pairs 2 "3 4"
+	add_link_rt_pairs 3 "4"
+
+	# set up the basic connectivity of routers and routes required for
+	# reachability of SIDs.
+	setup_rt_networking 1 "2 3 4"
+	setup_rt_networking 2 "1 3 4"
+	setup_rt_networking 3 "1 2 4"
+	setup_rt_networking 4 "1 2 3"
+
+	# set up the hosts connected to routers
+	setup_hs 1 1
+	setup_hs 2 2
+
+	# set up default SRv6 Endpoints (i.e. SRv6 End behavior)
+	setup_rt_local_sids 1 "2 3 4"
+	setup_rt_local_sids 2 "1 3 4"
+	setup_rt_local_sids 3 "1 2 4"
+	setup_rt_local_sids 4 "1 2 3"
+
+	# set up SRv6 policies
+	# create a connection between hosts hs-1 and hs-2.
+	# The path between hs-1 and hs-2 traverses SRv6 aware routers.
+	# For each direction two path are chosen:
+	#
+	# Direction hs-1 -> hs-2 (PSP flavor)
+	#  - rt-1 (SRv6 H.Insert policy)
+	#  - rt-3 (SRv6 End behavior)
+	#  - rt-4 (SRv6 End flavor PSP with SL>1, acting as End behavior)
+	#  - rt-2 (SRv6 End flavor PSP with SL=1)
+	#
+	# Direction hs-2 -> hs-1 (PSP flavor)
+	#  - rt-2 (SRv6 H.Insert policy)
+	#  - rt-1 (SRv6 End flavor PSP with SL=1)
+	setup_rt_policy_ipv6 2 1 "3:noflv 4:psp 2:psp"
+	setup_rt_policy_ipv6 1 2 "1:psp"
+
+	# testing environment was set up successfully
+	SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+	local prefix
+	local rtsrc_nsname
+
+	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+	ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+
+	check_rt_connectivity "${rtsrc}" "${rtdst}"
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc="$1"
+
+	check_hs_ipv6_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+	local i
+	local j
+
+	log_section "IPv6 routers connectivity test"
+
+	for i in ${ROUTERS}; do
+		for j in ${ROUTERS}; do
+			if [ "${i}" -eq "${j}" ]; then
+				continue
+			fi
+
+			check_and_log_rt_connectivity "${i}" "${j}"
+		done
+	done
+}
+
+host2gateway_tests()
+{
+	local hs
+
+	log_section "IPv6 connectivity test among hosts and gateways"
+
+	for hs in ${HOSTS}; do
+		check_and_log_hs2gw_connectivity "${hs}"
+	done
+}
+
+host_srv6_end_flv_psp_tests()
+{
+	log_section "SRv6 connectivity test hosts (h1 <-> h2, PSP flavor)"
+
+	check_and_log_hs_connectivity 1 2
+	check_and_log_hs_connectivity 2 1
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+	local flavor="$1"
+
+	if ! ip route help 2>&1 | grep -qo "${flavor}"; then
+		echo "SKIP: Missing SRv6 ${flavor} flavor support in iproute2"
+		exit "${ksft_skip}"
+	fi
+}
+
+test_kernel_supp_or_ksft_skip()
+{
+	local flavor="$1"
+	local test_netns
+
+	test_netns="kflv-$(mktemp -u XXXXXXXX)"
+
+	if ! ip netns add "${test_netns}"; then
+		echo "SKIP: Cannot set up netns to test kernel support for flavors"
+		exit "${ksft_skip}"
+	fi
+
+	if ! ip -netns "${test_netns}" link \
+		add "${DUMMY_DEVNAME}" type dummy; then
+		echo "SKIP: Cannot set up dummy dev to test kernel support for flavors"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	if ! ip -netns "${test_netns}" link \
+		set "${DUMMY_DEVNAME}" up; then
+		echo "SKIP: Cannot activate dummy dev to test kernel support for flavors"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	if ! ip -netns "${test_netns}" -6 route \
+		add "${IPv6_TESTS_ADDR}" encap seg6local \
+		action End flavors "${flavor}" dev "${DUMMY_DEVNAME}"; then
+		echo "SKIP: ${flavor} flavor not supported in kernel"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	ip netns del "${test_netns}"
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+	local test_netns
+
+	test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+	if ! ip netns add "${test_netns}"; then
+		echo "SKIP: Cannot set up netns for testing dummy dev support"
+		exit "${ksft_skip}"
+	fi
+
+	modprobe dummy &>/dev/null || true
+	if ! ip -netns "${test_netns}" link \
+		add "${DUMMY_DEVNAME}" type dummy; then
+		echo "SKIP: dummy dev not supported"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	ip netns del "${test_netns}"
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges"
+	exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+test_command_or_ksft_skip cut
+test_command_or_ksft_skip sed
+test_command_or_ksft_skip sort
+test_command_or_ksft_skip xargs
+
+test_dummy_dev_or_ksft_skip
+test_iproute2_supp_or_ksft_skip psp
+test_kernel_supp_or_ksft_skip psp
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_srv6_end_flv_psp_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
new file mode 100755
index 000000000000..87e414cc417c
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -0,0 +1,1145 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the support of NEXT-C-SID flavor for SRv6
+# End behavior.
+# A basic knowledge of SRv6 architecture [1] and of the compressed SID approach
+# [2] is assumed for the reader.
+#
+# The network topology used in the selftest is depicted hereafter, composed by
+# two hosts and four routers. Hosts hs-1 and hs-2 are connected through an
+# IPv4/IPv6 L3 VPN service, offered by routers rt-1, rt-2, rt-3 and rt-4 using
+# the NEXT-C-SID flavor. The key components for such VPNs are:
+#
+#    i) The SRv6 H.Encaps/H.Encaps.Red behaviors [1] apply SRv6 Policies on
+#       traffic received by connected hosts, initiating the VPN tunnel;
+#
+#   ii) The SRv6 End behavior [1] advances the active SID in the SID List
+#       carried by the SRH;
+#
+#  iii) The NEXT-C-SID mechanism [2] offers the possibility of encoding several
+#       SRv6 segments within a single 128-bit SID address, referred to as a
+#       Compressed SID (C-SID) container. In this way, the length of the SID
+#       List can be drastically reduced.
+#       The NEXT-C-SID is provided as a "flavor" of the SRv6 End behavior
+#       which advances the current C-SID (i.e. the Locator-Node Function defined
+#       in [2]) with the next one carried in the Argument, if available.
+#       When no more C-SIDs are available in the Argument, the SRv6 End behavior
+#       will apply the End function selecting the next SID in the SID List.
+#
+#   iv) The SRv6 End.DT46 behavior [1] is used for removing the SRv6 Policy and,
+#       thus, it terminates the VPN tunnel. Such a behavior is capable of
+#       handling, at the same time, both tunneled IPv4 and IPv6 traffic.
+#
+# [1] https://datatracker.ietf.org/doc/html/rfc8986
+# [2] https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression
+#
+#
+#               cafe::1                      cafe::2
+#              10.0.0.1                     10.0.0.2
+#             +--------+                   +--------+
+#             |        |                   |        |
+#             |  hs-1  |                   |  hs-2  |
+#             |        |                   |        |
+#             +---+----+                   +----+---+
+#    cafe::/64    |                             |      cafe::/64
+#  10.0.0.0/24    |                             |    10.0.0.0/24
+#             +---+----+                   +----+---+
+#             |        |  fcf0:0:1:2::/64  |        |
+#             |  rt-1  +-------------------+  rt-2  |
+#             |        |                   |        |
+#             +---+----+                   +----+---+
+#                 |      .               .      |
+#                 |  fcf0:0:1:3::/64   .        |
+#                 |          .       .          |
+#                 |            .   .            |
+# fcf0:0:1:4::/64 |              .              | fcf0:0:2:3::/64
+#                 |            .   .            |
+#                 |          .       .          |
+#                 |  fcf0:0:2:4::/64   .        |
+#                 |      .               .      |
+#             +---+----+                   +----+---+
+#             |        |                   |        |
+#             |  rt-4  +-------------------+  rt-3  |
+#             |        |  fcf0:0:3:4::/64  |        |
+#             +---+----+                   +----+---+
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
+# the selftest network.
+#
+# Local SID/C-SID table
+# =====================
+#
+# Each SRv6 router is configured with a Local SID/C-SID table in which
+# SIDs/C-SIDs are stored. Considering an SRv6 router rt-x, SIDs/C-SIDs are
+# configured in the Local SID/C-SIDs table as follows:
+#
+#   Local SID/C-SID table for SRv6 router rt-x
+#   +-----------------------------------------------------------+
+#   |fcff:x::d46 is associated with the non-compressed SRv6     |
+#   |   End.DT46 behavior                                       |
+#   +-----------------------------------------------------------+
+#   |fcbb:0:0x00::/48 is associated with the NEXT-C-SID flavor  |
+#   |   of SRv6 End behavior                                    |
+#   +-----------------------------------------------------------+
+#   |fcbb:0:0x00:d46::/64 is associated with the SRv6 End.DT46  |
+#   |   behavior when NEXT-C-SID compression is turned on       |
+#   +-----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved for implementing SRv6 services with regular
+# (non compressed) SIDs. Reachability of SIDs is ensured by proper configuration
+# of the IPv6 routing tables in the routers.
+# Similarly, the fcbb:0::/32 prefix is reserved for implementing SRv6 VPN
+# services leveraging the NEXT-C-SID compression mechanism. Indeed, the
+# fcbb:0::/32 is used for encoding the Locator-Block while the Locator-Node
+# Function is encoded with 16 bits.
+#
+# Incoming traffic classification and application of SRv6 Policies
+# ================================================================
+#
+# An SRv6 ingress router applies different SRv6 Policies to the traffic received
+# from a connected host, considering the IPv4 or IPv6 destination address.
+# SRv6 policy enforcement consists of encapsulating the received traffic into a
+# new IPv6 packet with a given SID List contained in the SRH.
+# When the SID List contains only one SID, the SRH could be omitted completely
+# and that SID is stored directly in the IPv6 Destination Address (DA) (this is
+# called "reduced" encapsulation).
+#
+# Test cases for NEXT-C-SID
+# =========================
+#
+# We consider two test cases for NEXT-C-SID: i) single SID and ii) double SID.
+#
+# In the single SID test case we have a number of segments that are all
+# contained in a single Compressed SID (C-SID) container. Therefore the
+# resulting SID List has only one SID. Using the reduced encapsulation format
+# this will result in a packet with no SRH.
+#
+# In the double SID test case we have one segment carried in a Compressed SID
+# (C-SID) container, followed by a regular (non compressed) SID. The resulting
+# SID List has two segments and it is possible to test the advance to the next
+# SID when all the C-SIDs in a C-SID container have been processed. Using the
+# reduced encapsulation format this will result in a packet with an SRH
+# containing 1 segment.
+#
+# For the single SID test case, we use the IPv4 addresses of hs-1 and hs-2, for
+# the double SID test case, we use their IPv6 addresses. This is only done to
+# simplify the test setup and avoid adding other hosts or multiple addresses on
+# the same interface of a host.
+#
+# Traffic from hs-1 to hs-2
+# -------------------------
+#
+# Packets generated from hs-1 and directed towards hs-2 are handled by rt-1
+# which applies the SRv6 Policies as follows:
+#
+#   i) IPv6 DA=cafe::2, H.Encaps.Red with SID List=fcbb:0:0400:0300:0200:d46::
+#  ii) IPv4 DA=10.0.0.2, H.Encaps.Red with SID List=fcbb:0:0300::,fcff:2::d46
+#
+# ### i) single SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red behavior which avoids the presence of the SRH at all, since it
+# pushes the single SID directly in the IPv6 DA. Such a SID encodes a whole
+# C-SID container carrying several C-SIDs (e.g. 0400, 0300, etc).
+#
+# As the packet reaches the router rt-4, the enabled NEXT-C-SID SRv6 End
+# behavior (associated with fcbb:0:0400::/48) is triggered. This behavior
+# analyzes the IPv6 DA and checks whether the Argument of the C-SID container
+# is zero or not. In this case, the Argument is *NOT* zero and the IPv6 DA is
+# updated as follows:
+#
+# +---------------------------------------------------------------+
+# | Before applying the rt-4 enabled NEXT-C-SID SRv6 End behavior |
+# +---------------------------------------------------------------+
+# |                            +---------- Argument               |
+# |                     vvvvvvvvvvvvvvvv                          |
+# | IPv6 DA fcbb:0:0400:0300:0200:d46::                           |
+# |                ^^^^    <-- shifting                           |
+# |                  |                                            |
+# |          Locator-Node Function                                |
+# +---------------------------------------------------------------+
+# | After applying the rt-4 enabled NEXT-C-SID SRv6 End behavior  |
+# +---------------------------------------------------------------+
+# |                          +---------- Argument                 |
+# |                    vvvvvvvvvvvv                               |
+# | IPv6 DA fcbb:0:0300:0200:d46::                                |
+# |                ^^^^                                           |
+# |                  |                                            |
+# |          Locator-Node Function                                |
+# +---------------------------------------------------------------+
+#
+# After having applied the enabled NEXT-C-SID SRv6 End behavior, the packet is
+# sent to the next node, i.e. rt-3.
+#
+# The enabled NEXT-C-SID SRv6 End behavior on rt-3 is executed as the packet is
+# received. This behavior processes the packet and updates the IPv6 DA with
+# fcbb:0:0200:d46::, since the Argument is *NOT* zero. Then, the packet is sent
+# to the router rt-2.
+#
+# The router rt-2 is configured for decapsulating the inner IPv6 packet and,
+# for this reason, it applies the SRv6 End.DT46 behavior on the received
+# packet. It is worth noting that the SRv6 End.DT46 behavior does not require
+# the presence of the SRH: it is fully capable to operate properly on
+# IPv4/IPv6-in-IPv6 encapsulations.
+# At the end of the decap operation, the packet is sent to the
+# host hs-2.
+#
+# ### ii) double SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the
+# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e.
+# fcff:2::d46. Hence, the packet sent by hs-1 to hs-2 is encapsulated in an
+# outer IPv6 header plus the SRH.
+#
+# As the packet reaches the node rt-3, the router applies the enabled NEXT-C-SID
+# SRv6 End behavior.
+#
+# +---------------------------------------------------------------+
+# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End behavior |
+# +---------------------------------------------------------------+
+# |                            +---------- Argument               |
+# |                      vvvv (Argument is all filled with zeros) |
+# | IPv6 DA fcbb:0:0300::                                         |
+# |                ^^^^                                           |
+# |                  |                                            |
+# |          Locator-Node Function                                |
+# +---------------------------------------------------------------+
+# | After applying the rt-3 enabled NEXT-C-SID SRv6 End behavior  |
+# +---------------------------------------------------------------+
+# |                                                               |
+# | IPv6 DA fcff:2::d46                                           |
+# |         ^^^^^^^^^^^                                           |
+# |              |                                                |
+# |        SID copied from the SID List contained in the SRH      |
+# +---------------------------------------------------------------+
+#
+# Since the Argument of the C-SID container is zero, the behavior can not
+# update the Locator-Node function with the next C-SID carried in the Argument
+# itself. Thus, the enabled NEXT-C-SID SRv6 End behavior operates as the
+# traditional End behavior: it updates the IPv6 DA by copying the next
+# available SID in the SID List carried by the SRH. After that, the packet is
+# sent to the node rt-2.
+#
+# Once the packet is received by rt-2, the router decapsulates the inner IPv6
+# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:2::d46)
+# and sends it to the host hs-2.
+#
+# Traffic from hs-2 to hs-1
+# -------------------------
+#
+# Packets generated from hs-2 and directed towards hs-1 are handled by rt-2
+# which applies the SRv6 Policies as follows:
+#
+#   i) IPv6 DA=cafe::1, SID List=fcbb:0:0300:0400:0100:d46::
+#  ii) IPv4 DA=10.0.0.1, SID List=fcbb:0:0300::,fcff:1::d46
+#
+# For simplicity, such SRv6 Policies were chosen so that, in both use cases (i)
+# and (ii), the network paths crossed by traffic from hs-2 to hs-1 are the same
+# as those taken by traffic from hs-1 to hs-2.
+# In this way, traffic from hs-2 to hs-1 is processed similarly to traffic from
+# hs-1 to hs-2. So, the traffic processing scheme turns out to be the same as
+# that adopted in the use cases already examined (of course, it is necessary to
+# consider the different SIDs/C-SIDs).
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly VRF_TID=100
+readonly VRF_DEVNAME="vrf-${VRF_TID}"
+readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly DT46_FUNC=0d46
+readonly HEADEND_ENCAP="encap.red"
+
+# do not add ':' as separator
+readonly LCBLOCK_ADDR=fcbb0000
+readonly LCBLOCK_BLEN=32
+# do not add ':' as separator
+readonly LCNODEFUNC_FMT="0%d00"
+readonly LCNODEFUNC_BLEN=16
+
+readonly LCBLOCK_NODEFUNC_BLEN=$((LCBLOCK_BLEN + LCNODEFUNC_BLEN))
+
+readonly CSID_CNTR_PREFIX="dead:beaf::/32"
+# ID of the router used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_ID_TEST=1
+# Routing table used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_TABLE=91
+
+# C-SID container configurations to be tested
+#
+# An entry of the array is defined as "a,b,c" where:
+# - 'a' and 'b' elements represent respectively the Locator-Block length
+#   (lblen) in bits and the Locator-Node Function length (nflen) in bits.
+#   'a' and 'b' can be set to default values using the placeholder "d" which
+#   indicates the default kernel values (32 for lblen and 16 for nflen);
+#   otherwise, any numeric value is accepted;
+# - 'c' indicates whether the C-SID configuration provided by the values 'a'
+#   and 'b' should be considered valid ("y") or invalid ("n").
+declare -ra CSID_CONTAINER_CFGS=(
+	"d,d,y"
+	"d,16,y"
+	"16,d,y"
+	"16,32,y"
+	"32,16,y"
+	"48,8,y"
+	"8,48,y"
+	"d,0,n"
+	"0,d,n"
+	"32,0,n"
+	"0,32,n"
+	"17,d,n"
+	"d,17,n"
+	"120,16,n"
+	"16,120,n"
+	"0,128,n"
+	"128,0,n"
+	"130,0,n"
+	"0,130,n"
+	"0,0,n"
+)
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+	local rc="$1"
+	local expected="$2"
+	local msg="$3"
+
+	if [ "${rc}" -eq "${expected}" ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	printf "\nTests passed: %3d\n" "${nsuccess}"
+	printf "Tests failed: %3d\n"   "${nfail}"
+
+	# when a test fails, the value of 'ret' is set to 1 (error code).
+	# Conversely, when all tests are passed successfully, the 'ret' value
+	# is set to 0 (success code).
+	if [ "${ret}" -ne 1 ]; then
+		ret=0
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+	local cmd="$1"
+
+	if [ ! -x "$(command -v "${cmd}")" ]; then
+		echo "SKIP: Could not run test without \"${cmd}\" tool";
+		exit "${ksft_skip}"
+	fi
+}
+
+get_nodename()
+{
+	local name="$1"
+
+	echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+	local rtid="$1"
+
+	get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+	local hsid="$1"
+
+	get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+	local name="$1"
+
+	ip netns add "${name}"
+}
+
+create_router()
+{
+	local rtid="$1"
+	local nsname
+
+	nsname="$(get_rtname "${rtid}")"
+
+	__create_namespace "${nsname}"
+}
+
+create_host()
+{
+	local hsid="$1"
+	local nsname
+
+	nsname="$(get_hsname "${hsid}")"
+
+	__create_namespace "${nsname}"
+}
+
+cleanup()
+{
+	local nsname
+	local i
+
+	# destroy routers
+	for i in ${ROUTERS}; do
+		nsname="$(get_rtname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# destroy hosts
+	for i in ${HOSTS}; do
+		nsname="$(get_hsname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# check whether the setup phase was completed successfully or not. In
+	# case of an error during the setup phase of the testing environment,
+	# the selftest is considered as "skipped".
+	if [ "${SETUP_ERR}" -ne 0 ]; then
+		echo "SKIP: Setting up the testing environment failed"
+		exit "${ksft_skip}"
+	fi
+
+	exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local neigh
+	local nsname
+	local neigh_nsname
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		neigh_nsname="$(get_rtname "${neigh}")"
+
+		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+			type veth peer name "veth-rt-${neigh}-${rt}" \
+			netns "${neigh_nsname}"
+	done
+}
+
+get_network_prefix()
+{
+	local rt="$1"
+	local neigh="$2"
+	local p="${rt}"
+	local q="${neigh}"
+
+	if [ "${p}" -gt "${q}" ]; then
+		p="${q}"; q="${rt}"
+	fi
+
+	echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local nsname
+	local net_prefix
+	local devname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		ip -netns "${nsname}" addr \
+			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+		ip -netns "${nsname}" link set "${devname}" up
+	done
+
+        ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy
+
+        ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up
+	ip -netns "${nsname}" link set lo up
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# build an ipv6 prefix/address based on the input string
+# Note that the input string does not contain ':' and '::' which are considered
+# to be implicit.
+# e.g.:
+#  - input:  fbcc00000400300
+#  - output: fbcc:0000:0400:0300:0000:0000:0000:0000
+#                                ^^^^^^^^^^^^^^^^^^^
+#                              fill the address with 0s
+build_ipv6_addr()
+{
+	local addr="$1"
+	local out=""
+	local strlen="${#addr}"
+	local padn
+	local i
+
+	# add ":" every 4 digits (16 bits)
+	for (( i = 0; i < strlen; i++ )); do
+		if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+			out="${out}:"
+		fi
+
+		out="${out}${addr:$i:1}"
+	done
+
+	# fill the remaining bits of the address with 0s
+	padn=$((32 - strlen))
+	for (( i = padn; i > 0; i-- )); do
+		if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+			out="${out}:"
+		fi
+
+		out="${out}0"
+	done
+
+	printf "${out}"
+}
+
+build_csid()
+{
+	local nodeid="$1"
+
+	printf "${LCNODEFUNC_FMT}" "${nodeid}"
+}
+
+build_lcnode_func_prefix()
+{
+	local nodeid="$1"
+	local lcnodefunc
+	local prefix
+	local out
+
+	lcnodefunc="$(build_csid "${nodeid}")"
+	prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}${lcnodefunc}")"
+
+	out="${prefix}/${LCBLOCK_NODEFUNC_BLEN}"
+
+	echo "${out}"
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local net_prefix
+	local devname
+	local nsname
+	local neigh
+	local lcnode_func_prefix
+	local lcblock_prefix
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		# set underlay network routes for SIDs reachability
+		ip -netns "${nsname}" -6 route \
+			add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+
+		# set the underlay network for C-SIDs reachability
+		lcnode_func_prefix="$(build_lcnode_func_prefix "${neigh}")"
+
+		ip -netns "${nsname}" -6 route \
+			add "${lcnode_func_prefix}" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+	done
+
+	lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+
+	# enabled NEXT-C-SID SRv6 End behavior (note that "dev" is the dummy
+	# dum0 device chosen for the sake of simplicity).
+	ip -netns "${nsname}" -6 route \
+		add "${lcnode_func_prefix}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End flavors next-csid \
+		lblen "${LCBLOCK_BLEN}" nflen "${LCNODEFUNC_BLEN}" \
+		dev "${DUMMY_DEVNAME}"
+
+	# all SIDs for VPNs start with a common locator. Routes and SRv6
+	# Endpoint behavior instaces are grouped together in the 'localsid'
+	# table.
+	ip -netns "${nsname}" -6 rule \
+		add to "${VPN_LOCATOR_SERVICE}::/16" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+
+	# common locator block for NEXT-C-SIDS compression mechanism.
+	lcblock_prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}")"
+	ip -netns "${nsname}" -6 rule \
+		add to "${lcblock_prefix}/${LCBLOCK_BLEN}" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router as well as the
+# decap SID in the egress one.
+# args:
+#  $1 - src host (evaluate automatically the ingress router)
+#  $2 - dst host (evaluate automatically the egress router)
+#  $3 - SRv6 routers configured for steering traffic (End behaviors)
+#  $4 - single SID or double SID
+#  $5 - traffic type (IPv6 or IPv4)
+__setup_l3vpn()
+{
+	local src="$1"
+	local dst="$2"
+	local end_rts="$3"
+	local mode="$4"
+	local traffic="$5"
+	local nsname
+	local policy
+	local container
+	local decapsid
+	local lcnfunc
+	local dt
+	local n
+	local rtsrc_nsname
+	local rtdst_nsname
+
+	rtsrc_nsname="$(get_rtname "${src}")"
+	rtdst_nsname="$(get_rtname "${dst}")"
+
+	container="${LCBLOCK_ADDR}"
+
+	# build first SID (C-SID container)
+	for n in ${end_rts}; do
+		lcnfunc="$(build_csid "${n}")"
+
+		container="${container}${lcnfunc}"
+	done
+
+	if [ "${mode}" -eq 1 ]; then
+		# single SID policy
+		dt="$(build_csid "${dst}")${DT46_FUNC}"
+		container="${container}${dt}"
+		# build the full ipv6 address for the container
+		policy="$(build_ipv6_addr "${container}")"
+
+		# build the decap SID used in the decap node
+		container="${LCBLOCK_ADDR}${dt}"
+		decapsid="$(build_ipv6_addr "${container}")"
+	else
+		# double SID policy
+		decapsid="${VPN_LOCATOR_SERVICE}:${dst}::${DT46_FUNC}"
+
+		policy="$(build_ipv6_addr "${container}"),${decapsid}"
+	fi
+
+	# apply encap policy
+	if [ "${traffic}" -eq 6 ]; then
+		ip -netns "${rtsrc_nsname}" -6 route \
+			add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
+			encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+			dev "${VRF_DEVNAME}"
+
+		ip -netns "${rtsrc_nsname}" -6 neigh \
+			add proxy "${IPv6_HS_NETWORK}::${dst}" \
+			dev "${RT2HS_DEVNAME}"
+	else
+		# "dev" must be different from the one where the packet is
+		# received, otherwise the proxy arp does not work.
+		ip -netns "${rtsrc_nsname}" -4 route \
+			add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
+			encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+			dev "${VRF_DEVNAME}"
+	fi
+
+	# apply decap
+	# Local End.DT46 behavior (decap)
+	ip -netns "${rtdst_nsname}" -6 route \
+		add "${decapsid}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End.DT46 vrftable "${VRF_TID}" \
+		dev "${VRF_DEVNAME}"
+}
+
+# see __setup_l3vpn()
+setup_ipv4_vpn_2sids()
+{
+	__setup_l3vpn "$1" "$2" "$3" 2 4
+}
+
+# see __setup_l3vpn()
+setup_ipv6_vpn_1sid()
+{
+	__setup_l3vpn "$1" "$2" "$3" 1 6
+}
+
+setup_hs()
+{
+	local hs="$1"
+	local rt="$2"
+	local hsname
+	local rtname
+
+	hsname="$(get_hsname "${hs}")"
+	rtname="$(get_rtname "${rt}")"
+
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns "${hsname}" link add veth0 type veth \
+		peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+	ip -netns "${hsname}" addr \
+		add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+	ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0
+
+	ip -netns "${hsname}" link set veth0 up
+	ip -netns "${hsname}" link set lo up
+
+	# configure the VRF on the router which is directly connected to the
+	# source host.
+	ip -netns "${rtname}" link \
+		add "${VRF_DEVNAME}" type vrf table "${VRF_TID}"
+	ip -netns "${rtname}" link set "${VRF_DEVNAME}" up
+
+	# enslave the veth interface connecting the router with the host to the
+	# VRF in the access router
+	ip -netns "${rtname}" link \
+		set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}"
+
+	# set default routes to unreachable for both ipv6 and ipv4
+	ip -netns "${rtname}" -6 route \
+		add unreachable default metric 4278198272 \
+		vrf "${VRF_DEVNAME}"
+	ip -netns "${rtname}" -4 route \
+		add unreachable default metric 4278198272 \
+		vrf "${VRF_DEVNAME}"
+
+	ip -netns "${rtname}" addr \
+		add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+	ip -netns "${rtname}" addr \
+		add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+
+	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup()
+{
+	local i
+
+	# create routers
+	ROUTERS="1 2 3 4"; readonly ROUTERS
+	for i in ${ROUTERS}; do
+		create_router "${i}"
+	done
+
+	# create hosts
+	HOSTS="1 2"; readonly HOSTS
+	for i in ${HOSTS}; do
+		create_host "${i}"
+	done
+
+	# set up the links for connecting routers
+	add_link_rt_pairs 1 "2 3 4"
+	add_link_rt_pairs 2 "3 4"
+	add_link_rt_pairs 3 "4"
+
+	# set up the basic connectivity of routers and routes required for
+	# reachability of SIDs.
+	setup_rt_networking 1 "2 3 4"
+	setup_rt_networking 2 "1 3 4"
+	setup_rt_networking 3 "1 2 4"
+	setup_rt_networking 4 "1 2 3"
+
+	# set up the hosts connected to routers
+	setup_hs 1 1
+	setup_hs 2 2
+
+	# set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46)
+	setup_rt_local_sids 1 "2 3 4"
+	setup_rt_local_sids 2 "1 3 4"
+	setup_rt_local_sids 3 "1 2 4"
+	setup_rt_local_sids 4 "1 2 3"
+
+	# set up SRv6 Policies
+
+	# create an IPv6 VPN between hosts hs-1 and hs-2.
+	#
+	# Direction hs-1 -> hs-2
+	# - rt-1 encap (H.Encaps.Red)
+	# - rt-4 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-2 SRv6 End.DT46 behavior
+	setup_ipv6_vpn_1sid 1 2 "4 3"
+
+	# Direction hs2 -> hs-1
+	# - rt-2 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-4 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-1 SRv6 End.DT46 behavior
+	setup_ipv6_vpn_1sid 2 1 "3 4"
+
+	# create an IPv4 VPN between hosts hs-1 and hs-2
+	#
+	# Direction hs-1 -> hs-2
+	# - rt-1 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-2 SRv6 End.DT46 behavior
+	setup_ipv4_vpn_2sids 1 2 "3"
+
+	# Direction hs-2 -> hs-1
+	# - rt-2 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-1 SRv6 End.DT46 behavior
+	setup_ipv4_vpn_2sids 2 1 "3"
+
+	# testing environment was set up successfully
+	SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+	local prefix
+	local rtsrc_nsname
+
+	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+	ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+
+	check_rt_connectivity "${rtsrc}" "${rtdst}"
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc="$1"
+
+	check_hs_ipv6_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+	check_hs_ipv4_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+router_tests()
+{
+	local i
+	local j
+
+	log_section "IPv6 routers connectivity test"
+
+	for i in ${ROUTERS}; do
+		for j in ${ROUTERS}; do
+			if [ "${i}" -eq "${j}" ]; then
+				continue
+			fi
+
+			check_and_log_rt_connectivity "${i}" "${j}"
+		done
+	done
+}
+
+host2gateway_tests()
+{
+	local hs
+
+	log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+	for hs in ${HOSTS}; do
+		check_and_log_hs2gw_connectivity "${hs}"
+	done
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6)"
+
+	check_and_log_hs_ipv6_connectivity 1 2
+	check_and_log_hs_ipv6_connectivity 2 1
+
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4)"
+
+	check_and_log_hs_ipv4_connectivity 1 2
+	check_and_log_hs_ipv4_connectivity 2 1
+}
+
+__nextcsid_end_behavior_test()
+{
+	local nsname="$1"
+	local cmd="$2"
+	local blen="$3"
+	local flen="$4"
+	local layout=""
+
+	if [ "${blen}" != "d" ]; then
+		layout="${layout} lblen ${blen}"
+	fi
+
+	if [ "${flen}" != "d" ]; then
+		layout="${layout} nflen ${flen}"
+	fi
+
+	ip -netns "${nsname}" -6 route \
+		"${cmd}" "${CSID_CNTR_PREFIX}" \
+		table "${CSID_CNTR_RT_TABLE}" \
+		encap seg6local action End flavors next-csid ${layout} \
+		dev "${DUMMY_DEVNAME}" &>/dev/null
+
+	return "$?"
+}
+
+rt_x_nextcsid_end_behavior_test()
+{
+	local rt="$1"
+	local blen="$2"
+	local flen="$3"
+	local nsname
+	local ret
+
+	nsname="$(get_rtname "${rt}")"
+
+	__nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
+	ret="$?"
+	__nextcsid_end_behavior_test "${nsname}" "del" "${blen}" "${flen}"
+
+	return "${ret}"
+}
+
+__parse_csid_container_cfg()
+{
+	local cfg="$1"
+	local index="$2"
+	local out
+
+	echo "${cfg}" | cut -d',' -f"${index}"
+}
+
+csid_container_cfg_tests()
+{
+	local valid
+	local blen
+	local flen
+	local cfg
+	local ret
+
+	log_section "C-SID Container config tests (legend: d='kernel default')"
+
+	for cfg in "${CSID_CONTAINER_CFGS[@]}"; do
+		blen="$(__parse_csid_container_cfg "${cfg}" 1)"
+		flen="$(__parse_csid_container_cfg "${cfg}" 2)"
+		valid="$(__parse_csid_container_cfg "${cfg}" 3)"
+
+		rt_x_nextcsid_end_behavior_test \
+			"${CSID_CNTR_RT_ID_TEST}" \
+			"${blen}" \
+			"${flen}"
+		ret="$?"
+
+		if [ "${valid}" == "y" ]; then
+			log_test "${ret}" 0 \
+				"Accept valid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+		else
+			log_test "${ret}" 2 \
+				"Reject invalid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+		fi
+	done
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+	if ! ip route help 2>&1 | grep -qo "next-csid"; then
+		echo "SKIP: Missing SRv6 NEXT-C-SID flavor support in iproute2"
+		exit "${ksft_skip}"
+	fi
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+        local test_netns
+
+        test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+        if ! ip netns add "${test_netns}"; then
+                echo "SKIP: Cannot set up netns for testing dummy dev support"
+                exit "${ksft_skip}"
+        fi
+
+        modprobe dummy &>/dev/null || true
+        if ! ip -netns "${test_netns}" link \
+                add "${DUMMY_DEVNAME}" type dummy; then
+                echo "SKIP: dummy dev not supported"
+
+                ip netns del "${test_netns}"
+                exit "${ksft_skip}"
+        fi
+
+        ip netns del "${test_netns}"
+}
+
+test_vrf_or_ksft_skip()
+{
+	modprobe vrf &>/dev/null || true
+	if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+		echo "SKIP: vrf sysctl does not exist"
+		exit "${ksft_skip}"
+	fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges"
+	exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+test_command_or_ksft_skip cut
+
+test_iproute2_supp_or_ksft_skip
+test_dummy_dev_or_ksft_skip
+test_vrf_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+csid_container_cfg_tests
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
new file mode 100755
index 000000000000..c79cb8ede17f
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -0,0 +1,1213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+#
+# This script is designed for testing the support of NEXT-C-SID flavor for SRv6
+# End.X behavior.
+# A basic knowledge of SRv6 architecture [1] and of the compressed SID approach
+# [2] is assumed for the reader.
+#
+# The network topology used in the selftest is depicted hereafter, composed of
+# two hosts and four routers. Hosts hs-1 and hs-2 are connected through an
+# IPv4/IPv6 L3 VPN service, offered by routers rt-1, rt-2, rt-3 and rt-4 using
+# the NEXT-C-SID flavor. The key components for such VPNs are:
+#
+#    i) The SRv6 H.Encaps/H.Encaps.Red behaviors [1] apply SRv6 Policies on
+#       traffic received by connected hosts, initiating the VPN tunnel;
+#
+#   ii) The SRv6 End.X behavior [1] (Endpoint with L3 cross connect) is a
+#       variant of SRv6 End behavior. It advances the active SID in the SID
+#       List carried by the SRH and forwards the packet to an L3 adjacency;
+#
+#  iii) The NEXT-C-SID mechanism [2] offers the possibility of encoding several
+#       SRv6 segments within a single 128-bit SID address, referred to as a
+#       Compressed SID (C-SID) container. In this way, the length of the SID
+#       List can be drastically reduced.
+#       The NEXT-C-SID is provided as a "flavor" of the SRv6 End.X behavior
+#       which advances the current C-SID (i.e. the Locator-Node Function defined
+#       in [2]) with the next one carried in the Argument, if available.
+#       When no more C-SIDs are available in the Argument, the SRv6 End.X
+#       behavior will apply the End.X function selecting the next SID in the SID
+#       List;
+#
+#   iv) The SRv6 End.DT46 behavior [1] is used for removing the SRv6 Policy and,
+#       thus, it terminates the VPN tunnel. Such a behavior is capable of
+#       handling, at the same time, both tunneled IPv4 and IPv6 traffic.
+#
+# [1] https://datatracker.ietf.org/doc/html/rfc8986
+# [2] https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression
+#
+#
+#               cafe::1                      cafe::2
+#              10.0.0.1                     10.0.0.2
+#             +--------+                   +--------+
+#             |        |                   |        |
+#             |  hs-1  |                   |  hs-2  |
+#             |        |                   |        |
+#             +---+----+                   +----+---+
+#    cafe::/64    |                             |      cafe::/64
+#  10.0.0.0/24    |                             |    10.0.0.0/24
+#             +---+----+                   +----+---+
+#             |        |  fcf0:0:1:2::/64  |        |
+#             |  rt-1  +-------------------+  rt-2  |
+#             |        |                   |        |
+#             +---+----+                   +----+---+
+#                 |      .               .      |
+#                 |  fcf0:0:1:3::/64   .        |
+#                 |          .       .          |
+#                 |            .   .            |
+# fcf0:0:1:4::/64 |              .              | fcf0:0:2:3::/64
+#                 |            .   .            |
+#                 |          .       .          |
+#                 |  fcf0:0:2:4::/64   .        |
+#                 |      .               .      |
+#             +---+----+                   +----+---+
+#             |        |                   |        |
+#             |  rt-4  +-------------------+  rt-3  |
+#             |        |  fcf0:0:3:4::/64  |        |
+#             +---+----+                   +----+---+
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
+# the selftest network.
+#
+# Local SID/C-SID table
+# =====================
+#
+# Each SRv6 router is configured with a Local SID/C-SID table in which
+# SIDs/C-SIDs are stored. Considering an SRv6 router rt-x, SIDs/C-SIDs are
+# configured in the Local SID/C-SIDs table as follows:
+#
+#   Local SID/C-SID table for SRv6 router rt-x
+#   +-----------------------------------------------------------+
+#   |fcff:x::d46 is associated with the non-compressed SRv6     |
+#   |   End.DT46 behavior                                       |
+#   +-----------------------------------------------------------+
+#   |fcbb:0:0x00::/48 is associated with the NEXT-C-SID flavor  |
+#   |   of SRv6 End.X behavior                                  |
+#   +-----------------------------------------------------------+
+#   |fcbb:0:0x00:d46::/64 is associated with the SRv6 End.DT46  |
+#   |   behavior when NEXT-C-SID compression is turned on       |
+#   +-----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved for implementing SRv6 services with regular
+# (non compressed) SIDs. Reachability of SIDs is ensured by proper configuration
+# of the IPv6 routing tables in the routers.
+# Similarly, the fcbb:0::/32 prefix is reserved for implementing SRv6 VPN
+# services leveraging the NEXT-C-SID compression mechanism. Indeed, the
+# fcbb:0::/32 is used for encoding the Locator-Block while the Locator-Node
+# Function is encoded with 16 bits.
+#
+# Incoming traffic classification and application of SRv6 Policies
+# ================================================================
+#
+# An SRv6 ingress router applies different SRv6 Policies to the traffic received
+# from a connected host, considering the IPv4 or IPv6 destination address.
+# SRv6 policy enforcement consists of encapsulating the received traffic into a
+# new IPv6 packet with a given SID List contained in the SRH.
+# When the SID List contains only one SID, the SRH could be omitted completely
+# and that SID is stored directly in the IPv6 Destination Address (DA) (this is
+# called "reduced" encapsulation).
+#
+# Test cases for NEXT-C-SID
+# =========================
+#
+# We consider two test cases for NEXT-C-SID: i) single SID and ii) double SID.
+#
+# In the single SID test case we have a number of segments that are all
+# contained in a single Compressed SID (C-SID) container. Therefore the
+# resulting SID List has only one SID. Using the reduced encapsulation format
+# this will result in a packet with no SRH.
+#
+# In the double SID test case we have one segment carried in a Compressed SID
+# (C-SID) container, followed by a regular (non compressed) SID. The resulting
+# SID List has two segments and it is possible to test the advance to the next
+# SID when all the C-SIDs in a C-SID container have been processed. Using the
+# reduced encapsulation format this will result in a packet with an SRH
+# containing 1 segment.
+#
+# For the single SID test case, we use the IPv6 addresses of hs-1 and hs-2, for
+# the double SID test case, we use their IPv4 addresses. This is only done to
+# simplify the test setup and avoid adding other hosts or multiple addresses on
+# the same interface of a host.
+#
+# Traffic from hs-1 to hs-2
+# -------------------------
+#
+# Packets generated from hs-1 and directed towards hs-2 are handled by rt-1
+# which applies the SRv6 Policies as follows:
+#
+#   i) IPv6 DA=cafe::2, H.Encaps.Red with SID List=fcbb:0:0300:0200:d46::
+#  ii) IPv4 DA=10.0.0.2, H.Encaps.Red with SID List=fcbb:0:0300::,fcff:2::d46
+#
+# ### i) single SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red behavior which avoids the presence of the SRH at all, since it
+# pushes the single SID directly in the IPv6 DA. Such a SID encodes a whole
+# C-SID container carrying several C-SIDs (e.g. 0300, 0200, etc).
+#
+# As the packet reaches the router rt-3, the enabled NEXT-C-SID SRv6 End.X
+# behavior (associated with fcbb:0:0300::/48) is triggered. This behavior
+# analyzes the IPv6 DA and checks whether the Argument of the C-SID container
+# is zero or not. In this case, the Argument is *NOT* zero and the IPv6 DA is
+# updated as follows:
+#
+# +-----------------------------------------------------------------+
+# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior |
+# +-----------------------------------------------------------------+
+# |                            +---------- Argument                 |
+# |                     vvvvvvvvvv                                  |
+# | IPv6 DA fcbb:0:0300:0200:d46::                                  |
+# |                ^^^^    <-- shifting                             |
+# |                  |                                              |
+# |          Locator-Node Function                                  |
+# +-----------------------------------------------------------------+
+# | After applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior  |
+# +-----------------------------------------------------------------+
+# |                          +---------- Argument                   |
+# |                    vvvvvv                                       |
+# | IPv6 DA fcbb:0:0200:d46::                                       |
+# |                ^^^^                                             |
+# |                  |                                              |
+# |          Locator-Node Function                                  |
+# +-----------------------------------------------------------------+
+#
+# After having applied the enabled NEXT-C-SID SRv6 End.X behavior, the packet
+# is sent to rt-4 node using the L3 adjacency address fcf0:0:3:4::4.
+#
+# The node rt-4 performs a plain IPv6 forward to the rt-2 router according to
+# its Local SID table and using the IPv6 DA fcbb:0:0200:d46:: .
+#
+# The router rt-2 is configured for decapsulating the inner IPv6 packet and,
+# for this reason, it applies the SRv6 End.DT46 behavior on the received
+# packet. It is worth noting that the SRv6 End.DT46 behavior does not require
+# the presence of the SRH: it is fully capable to operate properly on
+# IPv4/IPv6-in-IPv6 encapsulations.
+# At the end of the decap operation, the packet is sent to the host hs-2.
+#
+# ### ii) double SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the
+# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e.
+# fcff:2::d46. Hence, the packet sent by hs-1 to hs-2 is encapsulated in an
+# outer IPv6 header plus the SRH.
+#
+# As the packet reaches the node rt-3, the router applies the enabled NEXT-C-SID
+# SRv6 End.X behavior.
+#
+# +-----------------------------------------------------------------+
+# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior |
+# +-----------------------------------------------------------------+
+# |                      +---------- Argument                       |
+# |                      vvvv (Argument is all filled with zeros)   |
+# | IPv6 DA fcbb:0:0300::                                           |
+# |                ^^^^                                             |
+# |                  |                                              |
+# |          Locator-Node Function                                  |
+# +-----------------------------------------------------------------+
+# | After applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior  |
+# +-----------------------------------------------------------------+
+# |                                                                 |
+# | IPv6 DA fcff:2::d46                                             |
+# |         ^^^^^^^^^^^                                             |
+# |              |                                                  |
+# |        SID copied from the SID List contained in the SRH        |
+# +-----------------------------------------------------------------+
+#
+# Since the Argument of the C-SID container is zero, the behavior can not
+# update the Locator-Node function with the next C-SID carried in the Argument
+# itself. Thus, the enabled NEXT-C-SID SRv6 End.X behavior operates as the
+# traditional End.X behavior: it updates the IPv6 DA by copying the next
+# available SID in the SID List carried by the SRH. Next, the packet is
+# forwarded to the rt-4 node using the L3 adjacency fcf0:3:4::4 previously
+# configured for this behavior.
+#
+# The node rt-4 performs a plain IPv6 forward to the rt-2 router according to
+# its Local SID table and using the IPv6 DA fcff:2::d46.
+#
+# Once the packet is received by rt-2, the router decapsulates the inner IPv4
+# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:2::d46)
+# and sends it to the host hs-2.
+#
+# Traffic from hs-2 to hs-1
+# -------------------------
+#
+# Packets generated from hs-2 and directed towards hs-1 are handled by rt-2
+# which applies the SRv6 Policies as follows:
+#
+#   i) IPv6 DA=cafe::1, SID List=fcbb:0:0400:0100:d46::
+#  ii) IPv4 DA=10.0.0.1, SID List=fcbb:0:0300::,fcff:1::d46
+#
+# ### i) single SID
+#
+# The node hs-2 sends an IPv6 packet directed to node hs-1. The router rt-2 is
+# directly connected to hs-2 and receives the packet. Rt-2 applies the
+# H.Encap.Red behavior with policy i) described above. Since there is only one
+# SID, the SRH header is omitted and the policy is inserted directly into the DA
+# of IPv6 packet.
+#
+# The packet reaches the router rt-4 and the enabled NEXT-C-SID SRv6 End.X
+# behavior (associated with fcbb:0:0400::/48) is triggered. This behavior
+# analyzes the IPv6 DA and checks whether the Argument of the C-SID container
+# is zero or not. The Argument is *NOT* zero and the C-SID in the IPv6 DA is
+# advanced. At this point, the current IPv6 DA is fcbb:0:0100:d46:: .
+# The enabled NEXT-C-SID SRv6 End.X behavior is configured with the L3 adjacency
+# fcf0:0:1:4::1, used to route traffic to the rt-1 node.
+#
+# The router rt-1 is configured for decapsulating the inner packet. It applies
+# the SRv6 End.DT46 behavior on the received packet. Decapsulation does not
+# require the presence of the SRH. At the end of the decap operation, the packet
+# is sent to the host hs-1.
+#
+# ### ii) double SID
+#
+# The router rt-2 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the
+# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e.
+# fcff:1::d46. Hence, the packet sent by hs-2 to hs-1 is encapsulated in an
+# outer IPv6 header plus the SRH.
+#
+# As the packet reaches the node rt-3, the enabled NEXT-C-SID SRv6 End.X
+# behavior bound to the SID fcbb:0:0300::/48 is triggered.
+# Since the Argument of the C-SID container is zero, the behavior can not
+# update the Locator-Node function with the next C-SID carried in the Argument
+# itself. Thus, the enabled NEXT-C-SID SRv6 End-X behavior operates as the
+# traditional End.X behavior: it updates the IPv6 DA by copying the next
+# available SID in the SID List carried by the SRH. After that, the packet is
+# forwarded to the rt-4 node using the L3 adjacency (fcf0:3:4::4) previously
+# configured for this behavior.
+#
+# The node rt-4 performs a plain IPv6 forward to the rt-1 router according to
+# its Local SID table, considering the IPv6 DA fcff:1::d46.
+#
+# Once the packet is received by rt-1, the router decapsulates the inner IPv4
+# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46)
+# and sends it to the host hs-1.
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly VRF_TID=100
+readonly VRF_DEVNAME="vrf-${VRF_TID}"
+readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly DT46_FUNC=0d46
+readonly HEADEND_ENCAP="encap.red"
+
+# do not add ':' as separator
+readonly LCBLOCK_ADDR=fcbb0000
+readonly LCBLOCK_BLEN=32
+# do not add ':' as separator
+readonly LCNODEFUNC_FMT="0%d00"
+readonly LCNODEFUNC_BLEN=16
+
+readonly LCBLOCK_NODEFUNC_BLEN=$((LCBLOCK_BLEN + LCNODEFUNC_BLEN))
+
+readonly CSID_CNTR_PREFIX="dead:beaf::/32"
+# ID of the router used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_ID_TEST=1
+# Routing table used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_TABLE=91
+
+# C-SID container configurations to be tested
+#
+# An entry of the array is defined as "a,b,c" where:
+# - 'a' and 'b' elements represent respectively the Locator-Block length
+#   (lblen) in bits and the Locator-Node Function length (nflen) in bits.
+#   'a' and 'b' can be set to default values using the placeholder "d" which
+#   indicates the default kernel values (32 for lblen and 16 for nflen);
+#   otherwise, any numeric value is accepted;
+# - 'c' indicates whether the C-SID configuration provided by the values 'a'
+#   and 'b' should be considered valid ("y") or invalid ("n").
+declare -ra CSID_CONTAINER_CFGS=(
+	"d,d,y"
+	"d,16,y"
+	"16,d,y"
+	"16,32,y"
+	"32,16,y"
+	"48,8,y"
+	"8,48,y"
+	"d,0,n"
+	"0,d,n"
+	"32,0,n"
+	"0,32,n"
+	"17,d,n"
+	"d,17,n"
+	"120,16,n"
+	"16,120,n"
+	"0,128,n"
+	"128,0,n"
+	"130,0,n"
+	"0,130,n"
+	"0,0,n"
+)
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+	local rc="$1"
+	local expected="$2"
+	local msg="$3"
+
+	if [ "${rc}" -eq "${expected}" ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	printf "\nTests passed: %3d\n" "${nsuccess}"
+	printf "Tests failed: %3d\n"   "${nfail}"
+
+	# when a test fails, the value of 'ret' is set to 1 (error code).
+	# Conversely, when all tests are passed successfully, the 'ret' value
+	# is set to 0 (success code).
+	if [ "${ret}" -ne 1 ]; then
+		ret=0
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+	local cmd="$1"
+
+	if [ ! -x "$(command -v "${cmd}")" ]; then
+		echo "SKIP: Could not run test without \"${cmd}\" tool";
+		exit "${ksft_skip}"
+	fi
+}
+
+get_nodename()
+{
+	local name="$1"
+
+	echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+	local rtid="$1"
+
+	get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+	local hsid="$1"
+
+	get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+	local name="$1"
+
+	ip netns add "${name}"
+}
+
+create_router()
+{
+	local rtid="$1"
+	local nsname
+
+	nsname="$(get_rtname "${rtid}")"
+
+	__create_namespace "${nsname}"
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+create_host()
+{
+	local hsid="$1"
+	local nsname
+
+	nsname="$(get_hsname "${hsid}")"
+
+	__create_namespace "${nsname}"
+}
+
+cleanup()
+{
+	local nsname
+	local i
+
+	# destroy routers
+	for i in ${ROUTERS}; do
+		nsname="$(get_rtname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# destroy hosts
+	for i in ${HOSTS}; do
+		nsname="$(get_hsname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# check whether the setup phase was completed successfully or not. In
+	# case of an error during the setup phase of the testing environment,
+	# the selftest is considered as "skipped".
+	if [ "${SETUP_ERR}" -ne 0 ]; then
+		echo "SKIP: Setting up the testing environment failed"
+		exit "${ksft_skip}"
+	fi
+
+	exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local neigh
+	local nsname
+	local neigh_nsname
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		neigh_nsname="$(get_rtname "${neigh}")"
+
+		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+			type veth peer name "veth-rt-${neigh}-${rt}" \
+			netns "${neigh_nsname}"
+	done
+}
+
+get_network_prefix()
+{
+	local rt="$1"
+	local neigh="$2"
+	local p="${rt}"
+	local q="${neigh}"
+
+	if [ "${p}" -gt "${q}" ]; then
+		p="${q}"; q="${rt}"
+	fi
+
+	echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local nsname
+	local net_prefix
+	local devname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		ip -netns "${nsname}" addr \
+			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+		ip -netns "${nsname}" link set "${devname}" up
+	done
+
+        ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy
+
+        ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up
+	ip -netns "${nsname}" link set lo up
+}
+
+# build an ipv6 prefix/address based on the input string
+# Note that the input string does not contain ':' and '::' which are considered
+# to be implicit.
+# e.g.:
+#  - input:  fbcc00000400300
+#  - output: fbcc:0000:0400:0300:0000:0000:0000:0000
+#                                ^^^^^^^^^^^^^^^^^^^
+#                              fill the address with 0s
+build_ipv6_addr()
+{
+	local addr="$1"
+	local out=""
+	local strlen="${#addr}"
+	local padn
+	local i
+
+	# add ":" every 4 digits (16 bits)
+	for (( i = 0; i < strlen; i++ )); do
+		if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+			out="${out}:"
+		fi
+
+		out="${out}${addr:$i:1}"
+	done
+
+	# fill the remaining bits of the address with 0s
+	padn=$((32 - strlen))
+	for (( i = padn; i > 0; i-- )); do
+		if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+			out="${out}:"
+		fi
+
+		out="${out}0"
+	done
+
+	printf "${out}"
+}
+
+build_csid()
+{
+	local nodeid="$1"
+
+	printf "${LCNODEFUNC_FMT}" "${nodeid}"
+}
+
+build_lcnode_func_prefix()
+{
+	local nodeid="$1"
+	local lcnodefunc
+	local prefix
+	local out
+
+	lcnodefunc="$(build_csid "${nodeid}")"
+	prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}${lcnodefunc}")"
+
+	out="${prefix}/${LCBLOCK_NODEFUNC_BLEN}"
+
+	echo "${out}"
+}
+
+set_end_x_nextcsid()
+{
+	local rt="$1"
+	local adj="$2"
+
+	nsname="$(get_rtname "${rt}")"
+	net_prefix="$(get_network_prefix "${rt}" "${adj}")"
+	lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+
+	# enabled NEXT-C-SID SRv6 End.X behavior (note that "dev" is the dummy
+	# dum0 device chosen for the sake of simplicity).
+	ip -netns "${nsname}" -6 route \
+		replace "${lcnode_func_prefix}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End.X nh6 "${net_prefix}::${adj}" \
+		flavors next-csid lblen "${LCBLOCK_BLEN}" \
+		nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
+set_underlay_sids_reachability()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		# set underlay network routes for SIDs reachability
+		ip -netns "${nsname}" -6 route \
+			replace "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+
+		# set the underlay network for C-SIDs reachability
+		lcnode_func_prefix="$(build_lcnode_func_prefix "${neigh}")"
+
+		ip -netns "${nsname}" -6 route \
+			replace "${lcnode_func_prefix}" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+	done
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local net_prefix
+	local devname
+	local nsname
+	local neigh
+	local lcnode_func_prefix
+	local lcblock_prefix
+
+	nsname="$(get_rtname "${rt}")"
+
+        set_underlay_sids_reachability "${rt}" "${rt_neighs}"
+
+	# all SIDs for VPNs start with a common locator. Routes and SRv6
+	# Endpoint behavior instaces are grouped together in the 'localsid'
+	# table.
+	ip -netns "${nsname}" -6 rule \
+		add to "${VPN_LOCATOR_SERVICE}::/16" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+
+	# common locator block for NEXT-C-SIDS compression mechanism.
+	lcblock_prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}")"
+	ip -netns "${nsname}" -6 rule \
+		add to "${lcblock_prefix}/${LCBLOCK_BLEN}" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router as well as the
+# decap SID in the egress one.
+# args:
+#  $1 - src host (evaluate automatically the ingress router)
+#  $2 - dst host (evaluate automatically the egress router)
+#  $3 - SRv6 routers configured for steering traffic (End.X behaviors)
+#  $4 - single SID or double SID
+#  $5 - traffic type (IPv6 or IPv4)
+__setup_l3vpn()
+{
+	local src="$1"
+	local dst="$2"
+	local end_rts="$3"
+	local mode="$4"
+	local traffic="$5"
+	local nsname
+	local policy
+	local container
+	local decapsid
+	local lcnfunc
+	local dt
+	local n
+	local rtsrc_nsname
+	local rtdst_nsname
+
+	rtsrc_nsname="$(get_rtname "${src}")"
+	rtdst_nsname="$(get_rtname "${dst}")"
+
+	container="${LCBLOCK_ADDR}"
+
+	# build first SID (C-SID container)
+	for n in ${end_rts}; do
+		lcnfunc="$(build_csid "${n}")"
+
+		container="${container}${lcnfunc}"
+	done
+
+	if [ "${mode}" -eq 1 ]; then
+		# single SID policy
+		dt="$(build_csid "${dst}")${DT46_FUNC}"
+		container="${container}${dt}"
+		# build the full ipv6 address for the container
+		policy="$(build_ipv6_addr "${container}")"
+
+		# build the decap SID used in the decap node
+		container="${LCBLOCK_ADDR}${dt}"
+		decapsid="$(build_ipv6_addr "${container}")"
+	else
+		# double SID policy
+		decapsid="${VPN_LOCATOR_SERVICE}:${dst}::${DT46_FUNC}"
+
+		policy="$(build_ipv6_addr "${container}"),${decapsid}"
+	fi
+
+	# apply encap policy
+	if [ "${traffic}" -eq 6 ]; then
+		ip -netns "${rtsrc_nsname}" -6 route \
+			add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
+			encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+			dev "${VRF_DEVNAME}"
+
+		ip -netns "${rtsrc_nsname}" -6 neigh \
+			add proxy "${IPv6_HS_NETWORK}::${dst}" \
+			dev "${RT2HS_DEVNAME}"
+	else
+		# "dev" must be different from the one where the packet is
+		# received, otherwise the proxy arp does not work.
+		ip -netns "${rtsrc_nsname}" -4 route \
+			add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
+			encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+			dev "${VRF_DEVNAME}"
+	fi
+
+	# apply decap
+	# Local End.DT46 behavior (decap)
+	ip -netns "${rtdst_nsname}" -6 route \
+		add "${decapsid}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End.DT46 vrftable "${VRF_TID}" \
+		dev "${VRF_DEVNAME}"
+}
+
+# see __setup_l3vpn()
+setup_ipv4_vpn_2sids()
+{
+	__setup_l3vpn "$1" "$2" "$3" 2 4
+}
+
+# see __setup_l3vpn()
+setup_ipv6_vpn_1sid()
+{
+	__setup_l3vpn "$1" "$2" "$3" 1 6
+}
+
+setup_hs()
+{
+	local hs="$1"
+	local rt="$2"
+	local hsname
+	local rtname
+
+	hsname="$(get_hsname "${hs}")"
+	rtname="$(get_rtname "${rt}")"
+
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns "${hsname}" link add veth0 type veth \
+		peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+	ip -netns "${hsname}" addr \
+		add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+	ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0
+
+	ip -netns "${hsname}" link set veth0 up
+	ip -netns "${hsname}" link set lo up
+
+	# configure the VRF on the router which is directly connected to the
+	# source host.
+	ip -netns "${rtname}" link \
+		add "${VRF_DEVNAME}" type vrf table "${VRF_TID}"
+	ip -netns "${rtname}" link set "${VRF_DEVNAME}" up
+
+	# enslave the veth interface connecting the router with the host to the
+	# VRF in the access router
+	ip -netns "${rtname}" link \
+		set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}"
+
+	# set default routes to unreachable for both ipv6 and ipv4
+	ip -netns "${rtname}" -6 route \
+		add unreachable default metric 4278198272 \
+		vrf "${VRF_DEVNAME}"
+	ip -netns "${rtname}" -4 route \
+		add unreachable default metric 4278198272 \
+		vrf "${VRF_DEVNAME}"
+
+	ip -netns "${rtname}" addr \
+		add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+	ip -netns "${rtname}" addr \
+		add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+
+	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup()
+{
+	local i
+
+	# create routers
+	ROUTERS="1 2 3 4"; readonly ROUTERS
+	for i in ${ROUTERS}; do
+		create_router "${i}"
+	done
+
+	# create hosts
+	HOSTS="1 2"; readonly HOSTS
+	for i in ${HOSTS}; do
+		create_host "${i}"
+	done
+
+	# set up the links for connecting routers
+	add_link_rt_pairs 1 "2 3 4"
+	add_link_rt_pairs 2 "3 4"
+	add_link_rt_pairs 3 "4"
+
+	# set up the basic connectivity of routers and routes required for
+	# reachability of SIDs.
+	setup_rt_networking 1 "2 3 4"
+	setup_rt_networking 2 "1 3 4"
+	setup_rt_networking 3 "1 2 4"
+	setup_rt_networking 4 "1 2 3"
+
+	# set up the hosts connected to routers
+	setup_hs 1 1
+	setup_hs 2 2
+
+	# set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46)
+	setup_rt_local_sids 1 "2 3 4"
+	setup_rt_local_sids 2 "1 3 4"
+	setup_rt_local_sids 3 "1 2 4"
+	setup_rt_local_sids 4 "1 2 3"
+
+	# set up SRv6 Policies
+
+	# create an IPv6 VPN between hosts hs-1 and hs-2.
+	#
+	# Direction hs-1 -> hs-2
+	# - rt-1 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor)
+	# - rt-4 Plain IPv6 Forwarding to rt-2
+	# - rt-2 SRv6 End.DT46 behavior
+	setup_ipv6_vpn_1sid 1 2 "3"
+
+	# Direction hs2 -> hs-1
+	# - rt-2 encap (H.Encaps.Red)
+	# - rt-4 SRv6 End.X behavior adj rt-1 (NEXT-C-SID flavor)
+	# - rt-1 SRv6 End.DT46 behavior
+	setup_ipv6_vpn_1sid 2 1 "4"
+
+	# create an IPv4 VPN between hosts hs-1 and hs-2
+	#
+	# Direction hs-1 -> hs-2
+	# - rt-1 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor)
+	# - rt-4 Plain IPv6 Forwarding to rt-2
+	# - rt-2 SRv6 End.DT46 behavior
+	setup_ipv4_vpn_2sids 1 2 "3"
+
+	# Direction hs-2 -> hs-1
+	# - rt-2 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor)
+	# - rt-4 Plain IPv6 Forwarding to rt-1
+	# - rt-1 SRv6 End.DT46 behavior
+	setup_ipv4_vpn_2sids 2 1 "3"
+
+	# Setup the adjacencies in the SRv6 aware routers
+	# - rt-3 SRv6 End.X adjacency with rt-4
+	# - rt-4 SRv6 End.X adjacency with rt-1
+        set_end_x_nextcsid 3 4
+        set_end_x_nextcsid 4 1
+
+	# testing environment was set up successfully
+	SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+	local prefix
+	local rtsrc_nsname
+
+	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+	ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+
+	check_rt_connectivity "${rtsrc}" "${rtdst}"
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc="$1"
+
+	check_hs_ipv6_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+	check_hs_ipv4_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+router_tests()
+{
+	local i
+	local j
+
+	log_section "IPv6 routers connectivity test"
+
+	for i in ${ROUTERS}; do
+		for j in ${ROUTERS}; do
+			if [ "${i}" -eq "${j}" ]; then
+				continue
+			fi
+
+			check_and_log_rt_connectivity "${i}" "${j}"
+		done
+	done
+}
+
+host2gateway_tests()
+{
+	local hs
+
+	log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+	for hs in ${HOSTS}; do
+		check_and_log_hs2gw_connectivity "${hs}"
+	done
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6)"
+
+	check_and_log_hs_ipv6_connectivity 1 2
+	check_and_log_hs_ipv6_connectivity 2 1
+
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4)"
+
+	check_and_log_hs_ipv4_connectivity 1 2
+	check_and_log_hs_ipv4_connectivity 2 1
+}
+
+__nextcsid_end_x_behavior_test()
+{
+	local nsname="$1"
+	local cmd="$2"
+	local blen="$3"
+	local flen="$4"
+	local layout=""
+
+	if [ "${blen}" != "d" ]; then
+		layout="${layout} lblen ${blen}"
+	fi
+
+	if [ "${flen}" != "d" ]; then
+		layout="${layout} nflen ${flen}"
+	fi
+
+	ip -netns "${nsname}" -6 route \
+		"${cmd}" "${CSID_CNTR_PREFIX}" \
+		table "${CSID_CNTR_RT_TABLE}" \
+		encap seg6local action End.X nh6 :: \
+		flavors next-csid ${layout} \
+		dev "${DUMMY_DEVNAME}" &>/dev/null
+
+	return "$?"
+}
+
+rt_x_nextcsid_end_x_behavior_test()
+{
+	local rt="$1"
+	local blen="$2"
+	local flen="$3"
+	local nsname
+	local ret
+
+	nsname="$(get_rtname "${rt}")"
+
+	__nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}"
+	ret="$?"
+	__nextcsid_end_x_behavior_test "${nsname}" "del" "${blen}" "${flen}"
+
+	return "${ret}"
+}
+
+__parse_csid_container_cfg()
+{
+	local cfg="$1"
+	local index="$2"
+	local out
+
+	echo "${cfg}" | cut -d',' -f"${index}"
+}
+
+csid_container_cfg_tests()
+{
+	local valid
+	local blen
+	local flen
+	local cfg
+	local ret
+
+	log_section "C-SID Container config tests (legend: d='kernel default')"
+
+	for cfg in "${CSID_CONTAINER_CFGS[@]}"; do
+		blen="$(__parse_csid_container_cfg "${cfg}" 1)"
+		flen="$(__parse_csid_container_cfg "${cfg}" 2)"
+		valid="$(__parse_csid_container_cfg "${cfg}" 3)"
+
+		rt_x_nextcsid_end_x_behavior_test \
+			"${CSID_CNTR_RT_ID_TEST}" \
+			"${blen}" \
+			"${flen}"
+		ret="$?"
+
+		if [ "${valid}" == "y" ]; then
+			log_test "${ret}" 0 \
+				"Accept valid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+		else
+			log_test "${ret}" 2 \
+				"Reject invalid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+		fi
+	done
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+	if ! ip route help 2>&1 | grep -qo "next-csid"; then
+		echo "SKIP: Missing SRv6 NEXT-C-SID flavor support in iproute2"
+		exit "${ksft_skip}"
+	fi
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+        local test_netns
+
+        test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+        if ! ip netns add "${test_netns}"; then
+                echo "SKIP: Cannot set up netns for testing dummy dev support"
+                exit "${ksft_skip}"
+        fi
+
+        modprobe dummy &>/dev/null || true
+        if ! ip -netns "${test_netns}" link \
+                add "${DUMMY_DEVNAME}" type dummy; then
+                echo "SKIP: dummy dev not supported"
+
+                ip netns del "${test_netns}"
+                exit "${ksft_skip}"
+        fi
+
+        ip netns del "${test_netns}"
+}
+
+test_vrf_or_ksft_skip()
+{
+	modprobe vrf &>/dev/null || true
+	if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+		echo "SKIP: vrf sysctl does not exist"
+		exit "${ksft_skip}"
+	fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges"
+	exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+test_command_or_ksft_skip cut
+
+test_iproute2_supp_or_ksft_skip
+test_dummy_dev_or_ksft_skip
+test_vrf_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+csid_container_cfg_tests
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
new file mode 100755
index 000000000000..28a775654b92
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -0,0 +1,879 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the SRv6 H.Encaps.Red behavior.
+#
+# Below is depicted the IPv6 network of an operator which offers advanced
+# IPv4/IPv6 VPN services to hosts, enabling them to communicate with each
+# other.
+# In this example, hosts hs-1 and hs-2 are connected through an IPv4/IPv6 VPN
+# service, while hs-3 and hs-4 are connected using an IPv6 only VPN.
+#
+# Routers rt-1,rt-2,rt-3 and rt-4 implement IPv4/IPv6 L3 VPN services
+# leveraging the SRv6 architecture. The key components for such VPNs are:
+#
+#   i) The SRv6 H.Encaps.Red behavior applies SRv6 Policies on traffic received
+#      by connected hosts, initiating the VPN tunnel. Such a behavior is an
+#      optimization of the SRv6 H.Encap aiming to reduce the length of the SID
+#      List carried in the pushed SRH. Specifically, the H.Encaps.Red removes
+#      the first SID contained in the SID List (i.e. SRv6 Policy) by storing it
+#      into the IPv6 Destination Address. When a SRv6 Policy is made of only one
+#      SID, the SRv6 H.Encaps.Red behavior omits the SRH at all and pushes that
+#      SID directly into the IPv6 DA;
+#
+#  ii) The SRv6 End behavior advances the active SID in the SID List carried by
+#      the SRH;
+#
+# iii) The SRv6 End.DT46 behavior is used for removing the SRv6 Policy and,
+#      thus, it terminates the VPN tunnel. Such a behavior is capable of
+#      handling, at the same time, both tunneled IPv4 and IPv6 traffic.
+#
+#
+#               cafe::1                      cafe::2
+#              10.0.0.1                     10.0.0.2
+#             +--------+                   +--------+
+#             |        |                   |        |
+#             |  hs-1  |                   |  hs-2  |
+#             |        |                   |        |
+#             +---+----+                   +--- +---+
+#    cafe::/64    |                             |      cafe::/64
+#  10.0.0.0/24    |                             |    10.0.0.0/24
+#             +---+----+                   +----+---+
+#             |        |  fcf0:0:1:2::/64  |        |
+#             |  rt-1  +-------------------+  rt-2  |
+#             |        |                   |        |
+#             +---+----+                   +----+---+
+#                 |      .               .      |
+#                 |  fcf0:0:1:3::/64   .        |
+#                 |          .       .          |
+#                 |            .   .            |
+# fcf0:0:1:4::/64 |              .              | fcf0:0:2:3::/64
+#                 |            .   .            |
+#                 |          .       .          |
+#                 |  fcf0:0:2:4::/64   .        |
+#                 |      .               .      |
+#             +---+----+                   +----+---+
+#             |        |                   |        |
+#             |  rt-4  +-------------------+  rt-3  |
+#             |        |  fcf0:0:3:4::/64  |        |
+#             +---+----+                   +----+---+
+#    cafe::/64    |                             |      cafe::/64
+#  10.0.0.0/24    |                             |    10.0.0.0/24
+#             +---+----+                   +--- +---+
+#             |        |                   |        |
+#             |  hs-4  |                   |  hs-3  |
+#             |        |                   |        |
+#             +--------+                   +--------+
+#               cafe::4                      cafe::3
+#              10.0.0.4                     10.0.0.3
+#
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y
+# in the IPv6 operator network.
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+#   Local SID table for SRv6 router rt-x
+#   +----------------------------------------------------------+
+#   |fcff:x::e is associated with the SRv6 End behavior        |
+#   |fcff:x::d46 is associated with the SRv6 End.DT46 behavior |
+#   +----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN
+# services. Reachability of SIDs is ensured by proper configuration of the IPv6
+# operator's network and SRv6 routers.
+#
+# # SRv6 Policies
+# ===============
+#
+# An SRv6 ingress router applies SRv6 policies to the traffic received from a
+# connected host. SRv6 policy enforcement consists of encapsulating the
+# received traffic into a new IPv6 packet with a given SID List contained in
+# the SRH.
+#
+# IPv4/IPv6 VPN between hs-1 and hs-2
+# -----------------------------------
+#
+# Hosts hs-1 and hs-2 are connected using dedicated IPv4/IPv6 VPNs.
+# Specifically, packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policies:
+#
+#   i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::e,fcff:2::d46
+#  ii.a) IPv4 traffic, SID List=fcff:2::d46
+#
+# Policy (i.a) steers tunneled IPv6 traffic through SRv6 routers
+# rt-3,rt-4,rt-2. Instead, Policy (ii.a) steers tunneled IPv4 traffic through
+# rt-2.
+# The H.Encaps.Red reduces the SID List (i.a) carried in SRH by removing the
+# first SID (fcff:3::e) and pushing it into the IPv6 DA. In case of IPv4
+# traffic, the H.Encaps.Red omits the presence of SRH at all, since the SID
+# List (ii.a) consists of only one SID that can be stored directly in the IPv6
+# DA.
+#
+# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following
+# policies:
+#
+#   i.b) IPv6 traffic, SID List=fcff:1::d46
+#  ii.b) IPv4 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d46
+#
+# Policy (i.b) steers tunneled IPv6 traffic through the SRv6 router rt-1.
+# Conversely, Policy (ii.b) steers tunneled IPv4 traffic through SRv6 routers
+# rt-4,rt-3,rt-1.
+# The H.Encaps.Red omits the SRH at all in case of (i.b) by pushing the single
+# SID (fcff::1::d46) inside the IPv6 DA.
+# The H.Encaps.Red reduces the SID List (ii.b) in the SRH by removing the first
+# SID (fcff:4::e) and pushing it into the IPv6 DA.
+#
+# In summary:
+#  hs-1->hs-2 |IPv6 DA=fcff:3::e|SRH SIDs=fcff:4::e,fcff:2::d46|IPv6|...| (i.a)
+#  hs-1->hs-2 |IPv6 DA=fcff:2::d46|IPv4|...|                              (ii.a)
+#
+#  hs-2->hs-1 |IPv6 DA=fcff:1::d46|IPv6|...|                              (i.b)
+#  hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d46|IPv4|...| (ii.b)
+#
+#
+# IPv6 VPN between hs-3 and hs-4
+# ------------------------------
+#
+# Hosts hs-3 and hs-4 are connected using a dedicated IPv6 only VPN.
+# Specifically, packets generated from hs-3 and directed towards hs-4 are
+# handled by rt-3 which applies the following SRv6 Policy:
+#
+#  i.c) IPv6 traffic, SID List=fcff:2::e,fcff:4::d46
+#
+# Policy (i.c) steers tunneled IPv6 traffic through SRv6 routers rt-2,rt-4.
+# The H.Encaps.Red reduces the SID List (i.c) carried in SRH by pushing the
+# first SID (fcff:2::e) in the IPv6 DA.
+#
+# On the reverse path (i.e. from hs-4 to hs-3) the router rt-4 applies the
+# following SRv6 Policy:
+#
+#  i.d) IPv6 traffic, SID List=fcff:1::e,fcff:3::d46.
+#
+# Policy (i.d) steers tunneled IPv6 traffic through SRv6 routers rt-1,rt-3.
+# The H.Encaps.Red reduces the SID List (i.d) carried in SRH by pushing the
+# first SID (fcff:1::e) in the IPv6 DA.
+#
+# In summary:
+#  hs-3->hs-4 |IPv6 DA=fcff:2::e|SRH SIDs=fcff:4::d46|IPv6|...| (i.c)
+#  hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly VRF_TID=100
+readonly VRF_DEVNAME="vrf-${VRF_TID}"
+readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly END_FUNC=000e
+readonly DT46_FUNC=0d46
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+	local rc="$1"
+	local expected="$2"
+	local msg="$3"
+
+	if [ "${rc}" -eq "${expected}" ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	printf "\nTests passed: %3d\n" "${nsuccess}"
+	printf "Tests failed: %3d\n"   "${nfail}"
+
+	# when a test fails, the value of 'ret' is set to 1 (error code).
+	# Conversely, when all tests are passed successfully, the 'ret' value
+	# is set to 0 (success code).
+	if [ "${ret}" -ne 1 ]; then
+		ret=0
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+	local cmd="$1"
+
+	if [ ! -x "$(command -v "${cmd}")" ]; then
+		echo "SKIP: Could not run test without \"${cmd}\" tool";
+		exit "${ksft_skip}"
+	fi
+}
+
+get_nodename()
+{
+	local name="$1"
+
+	echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+	local rtid="$1"
+
+	get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+	local hsid="$1"
+
+	get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+	local name="$1"
+
+	ip netns add "${name}"
+}
+
+create_router()
+{
+	local rtid="$1"
+	local nsname
+
+	nsname="$(get_rtname "${rtid}")"
+
+	__create_namespace "${nsname}"
+}
+
+create_host()
+{
+	local hsid="$1"
+	local nsname
+
+	nsname="$(get_hsname "${hsid}")"
+
+	__create_namespace "${nsname}"
+}
+
+cleanup()
+{
+	local nsname
+	local i
+
+	# destroy routers
+	for i in ${ROUTERS}; do
+		nsname="$(get_rtname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# destroy hosts
+	for i in ${HOSTS}; do
+		nsname="$(get_hsname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# check whether the setup phase was completed successfully or not. In
+	# case of an error during the setup phase of the testing environment,
+	# the selftest is considered as "skipped".
+	if [ "${SETUP_ERR}" -ne 0 ]; then
+		echo "SKIP: Setting up the testing environment failed"
+		exit "${ksft_skip}"
+	fi
+
+	exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local neigh
+	local nsname
+	local neigh_nsname
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		neigh_nsname="$(get_rtname "${neigh}")"
+
+		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+			type veth peer name "veth-rt-${neigh}-${rt}" \
+			netns "${neigh_nsname}"
+	done
+}
+
+get_network_prefix()
+{
+	local rt="$1"
+	local neigh="$2"
+	local p="${rt}"
+	local q="${neigh}"
+
+	if [ "${p}" -gt "${q}" ]; then
+		p="${q}"; q="${rt}"
+	fi
+
+	echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local nsname
+	local net_prefix
+	local devname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		ip -netns "${nsname}" addr \
+			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+		ip -netns "${nsname}" link set "${devname}" up
+	done
+
+	ip -netns "${nsname}" link set lo up
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local net_prefix
+	local devname
+	local nsname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		# set underlay network routes for SIDs reachability
+		ip -netns "${nsname}" -6 route \
+			add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+	done
+
+	# Local End behavior (note that "dev" is dummy and the VRF is chosen
+	# for the sake of simplicity).
+	ip -netns "${nsname}" -6 route \
+		add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End dev "${VRF_DEVNAME}"
+
+	# Local End.DT46 behavior
+	ip -netns "${nsname}" -6 route \
+		add "${VPN_LOCATOR_SERVICE}:${rt}::${DT46_FUNC}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End.DT46 vrftable "${VRF_TID}" \
+		dev "${VRF_DEVNAME}"
+
+	# all SIDs for VPNs start with a common locator. Routes and SRv6
+	# Endpoint behavior instaces are grouped together in the 'localsid'
+	# table.
+	ip -netns "${nsname}" -6 rule \
+		add to "${VPN_LOCATOR_SERVICE}::/16" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+
+	# set default routes to unreachable for both ipv4 and ipv6
+	ip -netns "${nsname}" -6 route \
+		add unreachable default metric 4278198272 \
+		vrf "${VRF_DEVNAME}"
+
+	ip -netns "${nsname}" -4 route \
+		add unreachable default metric 4278198272 \
+		vrf "${VRF_DEVNAME}"
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router.
+# args:
+#  $1 - destination host (i.e. cafe::x host)
+#  $2 - SRv6 router configured for enforcing the SRv6 Policy
+#  $3 - SRv6 routers configured for steering traffic (End behaviors)
+#  $4 - SRv6 router configured for removing the SRv6 Policy (router connected
+#       to the destination host)
+#  $5 - encap mode (full or red)
+#  $6 - traffic type (IPv6 or IPv4)
+__setup_rt_policy()
+{
+	local dst="$1"
+	local encap_rt="$2"
+	local end_rts="$3"
+	local dec_rt="$4"
+	local mode="$5"
+	local traffic="$6"
+	local nsname
+	local policy=''
+	local n
+
+	nsname="$(get_rtname "${encap_rt}")"
+
+	for n in ${end_rts}; do
+		policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
+	done
+
+	policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}"
+
+	# add SRv6 policy to incoming traffic sent by connected hosts
+	if [ "${traffic}" -eq 6 ]; then
+		ip -netns "${nsname}" -6 route \
+			add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
+			encap seg6 mode "${mode}" segs "${policy}" \
+			dev "${VRF_DEVNAME}"
+
+		ip -netns "${nsname}" -6 neigh \
+			add proxy "${IPv6_HS_NETWORK}::${dst}" \
+			dev "${RT2HS_DEVNAME}"
+	else
+		# "dev" must be different from the one where the packet is
+		# received, otherwise the proxy arp does not work.
+		ip -netns "${nsname}" -4 route \
+			add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
+			encap seg6 mode "${mode}" segs "${policy}" \
+			dev "${VRF_DEVNAME}"
+	fi
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+	__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6
+}
+
+#see __setup_rt_policy
+setup_rt_policy_ipv4()
+{
+	__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4
+}
+
+setup_hs()
+{
+	local hs="$1"
+	local rt="$2"
+	local hsname
+	local rtname
+
+	hsname="$(get_hsname "${hs}")"
+	rtname="$(get_rtname "${rt}")"
+
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns "${hsname}" link add veth0 type veth \
+		peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+	ip -netns "${hsname}" addr \
+		add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+	ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0
+
+	ip -netns "${hsname}" link set veth0 up
+	ip -netns "${hsname}" link set lo up
+
+	# configure the VRF on the router which is directly connected to the
+	# source host.
+	ip -netns "${rtname}" link \
+		add "${VRF_DEVNAME}" type vrf table "${VRF_TID}"
+	ip -netns "${rtname}" link set "${VRF_DEVNAME}" up
+
+	# enslave the veth interface connecting the router with the host to the
+	# VRF in the access router
+	ip -netns "${rtname}" link \
+		set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}"
+
+	ip -netns "${rtname}" addr \
+		add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+	ip -netns "${rtname}" addr \
+		add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+
+	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup()
+{
+	local i
+
+	# create routers
+	ROUTERS="1 2 3 4"; readonly ROUTERS
+	for i in ${ROUTERS}; do
+		create_router "${i}"
+	done
+
+	# create hosts
+	HOSTS="1 2 3 4"; readonly HOSTS
+	for i in ${HOSTS}; do
+		create_host "${i}"
+	done
+
+	# set up the links for connecting routers
+	add_link_rt_pairs 1 "2 3 4"
+	add_link_rt_pairs 2 "3 4"
+	add_link_rt_pairs 3 "4"
+
+	# set up the basic connectivity of routers and routes required for
+	# reachability of SIDs.
+	setup_rt_networking 1 "2 3 4"
+	setup_rt_networking 2 "1 3 4"
+	setup_rt_networking 3 "1 2 4"
+	setup_rt_networking 4 "1 2 3"
+
+	# set up the hosts connected to routers
+	setup_hs 1 1
+	setup_hs 2 2
+	setup_hs 3 3
+	setup_hs 4 4
+
+	# set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46)
+	setup_rt_local_sids 1 "2 3 4"
+	setup_rt_local_sids 2 "1 3 4"
+	setup_rt_local_sids 3 "1 2 4"
+	setup_rt_local_sids 4 "1 2 3"
+
+	# set up SRv6 policies
+
+	# create an IPv6 VPN between hosts hs-1 and hs-2.
+	# the network path between hs-1 and hs-2 traverses several routers
+	# depending on the direction of traffic.
+	#
+	# Direction hs-1 -> hs-2 (H.Encaps.Red)
+	#  - rt-3,rt-4 (SRv6 End behaviors)
+	#  - rt-2 (SRv6 End.DT46 behavior)
+	#
+	# Direction hs-2 -> hs-1 (H.Encaps.Red)
+	#  - rt-1 (SRv6 End.DT46 behavior)
+	setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red
+	setup_rt_policy_ipv6 1 2 "" 1 encap.red
+
+	# create an IPv4 VPN between hosts hs-1 and hs-2
+	# the network path between hs-1 and hs-2 traverses several routers
+	# depending on the direction of traffic.
+	#
+	# Direction hs-1 -> hs-2 (H.Encaps.Red)
+	# - rt-2 (SRv6 End.DT46 behavior)
+	#
+	# Direction hs-2 -> hs-1 (H.Encaps.Red)
+	#  - rt-4,rt-3 (SRv6 End behaviors)
+	#  - rt-1 (SRv6 End.DT46 behavior)
+	setup_rt_policy_ipv4 2 1 "" 2 encap.red
+	setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red
+
+	# create an IPv6 VPN between hosts hs-3 and hs-4
+	# the network path between hs-3 and hs-4 traverses several routers
+	# depending on the direction of traffic.
+	#
+	# Direction hs-3 -> hs-4 (H.Encaps.Red)
+	# - rt-2 (SRv6 End Behavior)
+	# - rt-4 (SRv6 End.DT46 behavior)
+	#
+	# Direction hs-4 -> hs-3 (H.Encaps.Red)
+	#  - rt-1 (SRv6 End behavior)
+	#  - rt-3 (SRv6 End.DT46 behavior)
+	setup_rt_policy_ipv6 4 3 "2" 4 encap.red
+	setup_rt_policy_ipv6 3 4 "1" 3 encap.red
+
+	# testing environment was set up successfully
+	SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+	local prefix
+	local rtsrc_nsname
+
+	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+	ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+
+	check_rt_connectivity "${rtsrc}" "${rtdst}"
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc="$1"
+
+	check_hs_ipv6_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+	check_hs_ipv4_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+	check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+check_and_log_hs_ipv6_isolation()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	# in this case, the connectivity test must fail
+	check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 1 "IPv6 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_isolation()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	# in this case, the connectivity test must fail
+	check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 1 "IPv4 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}"
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_and_log_hs_ipv6_isolation "${hssrc}" "${hsdst}"
+	check_and_log_hs_ipv4_isolation "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+	local i
+	local j
+
+	log_section "IPv6 routers connectivity test"
+
+	for i in ${ROUTERS}; do
+		for j in ${ROUTERS}; do
+			if [ "${i}" -eq "${j}" ]; then
+				continue
+			fi
+
+			check_and_log_rt_connectivity "${i}" "${j}"
+		done
+	done
+}
+
+host2gateway_tests()
+{
+	local hs
+
+	log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+	for hs in ${HOSTS}; do
+		check_and_log_hs2gw_connectivity "${hs}"
+	done
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4/IPv6)"
+
+	check_and_log_hs_connectivity 1 2
+	check_and_log_hs_connectivity 2 1
+
+	log_section "SRv6 VPN connectivity test hosts (h3 <-> h4, IPv6 only)"
+
+	check_and_log_hs_ipv6_connectivity 3 4
+	check_and_log_hs_ipv6_connectivity 4 3
+}
+
+host_vpn_isolation_tests()
+{
+	local l1="1 2"
+	local l2="3 4"
+	local tmp
+	local i
+	local j
+	local k
+
+	log_section "SRv6 VPN isolation test among hosts"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation "${i}" "${j}"
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+	done
+
+	log_section "SRv6 VPN isolation test among hosts (h2 <-> h4, IPv4 only)"
+
+	check_and_log_hs_ipv4_isolation 2 4
+	check_and_log_hs_ipv4_isolation 4 2
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+	if ! ip route help 2>&1 | grep -qo "encap.red"; then
+		echo "SKIP: Missing SRv6 encap.red support in iproute2"
+		exit "${ksft_skip}"
+	fi
+}
+
+test_vrf_or_ksft_skip()
+{
+	modprobe vrf &>/dev/null || true
+	if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+		echo "SKIP: vrf sysctl does not exist"
+		exit "${ksft_skip}"
+	fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges"
+	exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+
+test_iproute2_supp_or_ksft_skip
+test_vrf_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
new file mode 100755
index 000000000000..cb4177d41b21
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -0,0 +1,821 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the SRv6 H.L2Encaps.Red behavior.
+#
+# Below is depicted the IPv6 network of an operator which offers L2 VPN
+# services to hosts, enabling them to communicate with each other.
+# In this example, hosts hs-1 and hs-2 are connected through an L2 VPN service.
+# Currently, the SRv6 subsystem in Linux allows hosts hs-1 and hs-2 to exchange
+# full L2 frames as long as they carry IPv4/IPv6.
+#
+# Routers rt-1,rt-2,rt-3 and rt-4 implement L2 VPN services
+# leveraging the SRv6 architecture. The key components for such VPNs are:
+#
+#   i) The SRv6 H.L2Encaps.Red behavior applies SRv6 Policies on traffic
+#      received by connected hosts, initiating the VPN tunnel. Such a behavior
+#      is an optimization of the SRv6 H.L2Encap aiming to reduce the
+#      length of the SID List carried in the pushed SRH. Specifically, the
+#      H.L2Encaps.Red removes the first SID contained in the SID List (i.e. SRv6
+#      Policy) by storing it into the IPv6 Destination Address. When a SRv6
+#      Policy is made of only one SID, the SRv6 H.L2Encaps.Red behavior omits
+#      the SRH at all and pushes that SID directly into the IPv6 DA;
+#
+#  ii) The SRv6 End behavior advances the active SID in the SID List
+#      carried by the SRH;
+#
+# iii) The SRv6 End.DX2 behavior is used for removing the SRv6 Policy
+#      and, thus, it terminates the VPN tunnel. The decapsulated L2 frame is
+#      sent over the interface connected with the destination host.
+#
+#               cafe::1                      cafe::2
+#              10.0.0.1                     10.0.0.2
+#             +--------+                   +--------+
+#             |        |                   |        |
+#             |  hs-1  |                   |  hs-2  |
+#             |        |                   |        |
+#             +---+----+                   +--- +---+
+#    cafe::/64    |                             |      cafe::/64
+#  10.0.0.0/24    |                             |    10.0.0.0/24
+#             +---+----+                   +----+---+
+#             |        |  fcf0:0:1:2::/64  |        |
+#             |  rt-1  +-------------------+  rt-2  |
+#             |        |                   |        |
+#             +---+----+                   +----+---+
+#                 |      .               .      |
+#                 |  fcf0:0:1:3::/64   .        |
+#                 |          .       .          |
+#                 |            .   .            |
+# fcf0:0:1:4::/64 |              .              | fcf0:0:2:3::/64
+#                 |            .   .            |
+#                 |          .       .          |
+#                 |  fcf0:0:2:4::/64   .        |
+#                 |      .               .      |
+#             +---+----+                   +----+---+
+#             |        |                   |        |
+#             |  rt-4  +-------------------+  rt-3  |
+#             |        |  fcf0:0:3:4::/64  |        |
+#             +---+----+                   +----+---+
+#
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y
+# in the IPv6 operator network.
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+#   Local SID table for SRv6 router rt-x
+#   +----------------------------------------------------------+
+#   |fcff:x::e is associated with the SRv6 End behavior        |
+#   |fcff:x::d2 is associated with the SRv6 End.DX2 behavior   |
+#   +----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN
+# services. Reachability of SIDs is ensured by proper configuration of the IPv6
+# operator's network and SRv6 routers.
+#
+# SRv6 Policies
+# =============
+#
+# An SRv6 ingress router applies SRv6 policies to the traffic received from a
+# connected host. SRv6 policy enforcement consists of encapsulating the
+# received traffic into a new IPv6 packet with a given SID List contained in
+# the SRH.
+#
+# L2 VPN between hs-1 and hs-2
+# ----------------------------
+#
+# Hosts hs-1 and hs-2 are connected using a dedicated L2 VPN.
+# Specifically, packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policies:
+#
+#   i.a) L2 traffic, SID List=fcff:2::d2
+#
+# Policy (i.a) steers tunneled L2 traffic through SRv6 router rt-2.
+# The H.L2Encaps.Red omits the presence of SRH at all, since the SID List
+# consists of only one SID (fcff:2::d2) that can be stored directly in the IPv6
+# DA.
+#
+# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following
+# policies:
+#
+#   i.b) L2 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d2
+#
+# Policy (i.b) steers tunneled L2 traffic through the SRv6 routers
+# rt-4,rt-3,rt2. The H.L2Encaps.Red reduces the SID List in the SRH by removing
+# the first SID (fcff:4::e) and pushing it into the IPv6 DA.
+#
+# In summary:
+#  hs-1->hs-2 |IPv6 DA=fcff:2::d2|eth|...|                              (i.a)
+#  hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly RT2HS_DEVNAME="veth-hs"
+readonly HS_VETH_NAME="veth0"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly MAC_PREFIX=00:00:00:c0:01
+readonly END_FUNC=000e
+readonly DX2_FUNC=00d2
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+	local rc="$1"
+	local expected="$2"
+	local msg="$3"
+
+	if [ "${rc}" -eq "${expected}" ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	printf "\nTests passed: %3d\n" "${nsuccess}"
+	printf "Tests failed: %3d\n"   "${nfail}"
+
+	# when a test fails, the value of 'ret' is set to 1 (error code).
+	# Conversely, when all tests are passed successfully, the 'ret' value
+	# is set to 0 (success code).
+	if [ "${ret}" -ne 1 ]; then
+		ret=0
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+	local cmd="$1"
+
+	if [ ! -x "$(command -v "${cmd}")" ]; then
+		echo "SKIP: Could not run test without \"${cmd}\" tool";
+		exit "${ksft_skip}"
+	fi
+}
+
+get_nodename()
+{
+	local name="$1"
+
+	echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+	local rtid="$1"
+
+	get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+	local hsid="$1"
+
+	get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+	local name="$1"
+
+	ip netns add "${name}"
+}
+
+create_router()
+{
+	local rtid="$1"
+	local nsname
+
+	nsname="$(get_rtname "${rtid}")"
+
+	__create_namespace "${nsname}"
+}
+
+create_host()
+{
+	local hsid="$1"
+	local nsname
+
+	nsname="$(get_hsname "${hsid}")"
+
+	__create_namespace "${nsname}"
+}
+
+cleanup()
+{
+	local nsname
+	local i
+
+	# destroy routers
+	for i in ${ROUTERS}; do
+		nsname="$(get_rtname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# destroy hosts
+	for i in ${HOSTS}; do
+		nsname="$(get_hsname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# check whether the setup phase was completed successfully or not. In
+	# case of an error during the setup phase of the testing environment,
+	# the selftest is considered as "skipped".
+	if [ "${SETUP_ERR}" -ne 0 ]; then
+		echo "SKIP: Setting up the testing environment failed"
+		exit "${ksft_skip}"
+	fi
+
+	exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local neigh
+	local nsname
+	local neigh_nsname
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		neigh_nsname="$(get_rtname "${neigh}")"
+
+		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+			type veth peer name "veth-rt-${neigh}-${rt}" \
+			netns "${neigh_nsname}"
+	done
+}
+
+get_network_prefix()
+{
+	local rt="$1"
+	local neigh="$2"
+	local p="${rt}"
+	local q="${neigh}"
+
+	if [ "${p}" -gt "${q}" ]; then
+		p="${q}"; q="${rt}"
+	fi
+
+	echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local nsname
+	local net_prefix
+	local devname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		ip -netns "${nsname}" addr \
+			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+		ip -netns "${nsname}" link set "${devname}" up
+	done
+
+	ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy
+
+	ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up
+	ip -netns "${nsname}" link set lo up
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local net_prefix
+	local devname
+	local nsname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		# set underlay network routes for SIDs reachability
+		ip -netns "${nsname}" -6 route \
+			add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+	done
+
+	# Local End behavior (note that dev "${DUMMY_DEVNAME}" is a dummy
+	# interface)
+	ip -netns "${nsname}" -6 route \
+		add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End dev "${DUMMY_DEVNAME}"
+
+	# all SIDs for VPNs start with a common locator. Routes and SRv6
+	# Endpoint behaviors instaces are grouped together in the 'localsid'
+	# table.
+	ip -netns "${nsname}" -6 rule add \
+		to "${VPN_LOCATOR_SERVICE}::/16" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router.
+# args:
+#  $1 - destination host (i.e. cafe::x host)
+#  $2 - SRv6 router configured for enforcing the SRv6 Policy
+#  $3 - SRv6 routers configured for steering traffic (End behaviors)
+#  $4 - SRv6 router configured for removing the SRv6 Policy (router connected
+#       to the destination host)
+#  $5 - encap mode (full or red)
+#  $6 - traffic type (IPv6 or IPv4)
+__setup_rt_policy()
+{
+	local dst="$1"
+	local encap_rt="$2"
+	local end_rts="$3"
+	local dec_rt="$4"
+	local mode="$5"
+	local traffic="$6"
+	local nsname
+	local policy=''
+	local n
+
+	nsname="$(get_rtname "${encap_rt}")"
+
+	for n in ${end_rts}; do
+		policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
+	done
+
+	policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DX2_FUNC}"
+
+	# add SRv6 policy to incoming traffic sent by connected hosts
+	if [ "${traffic}" -eq 6 ]; then
+		ip -netns "${nsname}" -6 route \
+			add "${IPv6_HS_NETWORK}::${dst}" \
+			encap seg6 mode "${mode}" segs "${policy}" \
+			dev dum0
+	else
+		ip -netns "${nsname}" -4 route \
+			add "${IPv4_HS_NETWORK}.${dst}" \
+			encap seg6 mode "${mode}" segs "${policy}" \
+			dev dum0
+	fi
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+	__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6
+}
+
+#see __setup_rt_policy
+setup_rt_policy_ipv4()
+{
+	__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4
+}
+
+setup_decap()
+{
+	local rt="$1"
+	local nsname
+
+	nsname="$(get_rtname "${rt}")"
+
+	# Local End.DX2 behavior
+	ip -netns "${nsname}" -6 route \
+		add "${VPN_LOCATOR_SERVICE}:${rt}::${DX2_FUNC}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End.DX2 oif "${RT2HS_DEVNAME}" \
+		dev "${RT2HS_DEVNAME}"
+}
+
+setup_hs()
+{
+	local hs="$1"
+	local rt="$2"
+	local hsname
+	local rtname
+
+	hsname="$(get_hsname "${hs}")"
+	rtname="$(get_rtname "${rt}")"
+
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns "${hsname}" link add "${HS_VETH_NAME}" type veth \
+		peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+	ip -netns "${hsname}" addr add "${IPv6_HS_NETWORK}::${hs}/64" \
+		dev "${HS_VETH_NAME}" nodad
+	ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" \
+		dev "${HS_VETH_NAME}"
+
+	ip -netns "${hsname}" link set "${HS_VETH_NAME}" up
+	ip -netns "${hsname}" link set lo up
+
+	ip -netns "${rtname}" addr add "${IPv6_HS_NETWORK}::254/64" \
+		dev "${RT2HS_DEVNAME}" nodad
+	ip -netns "${rtname}" addr \
+		add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+}
+
+# set an auto-generated mac address
+# args:
+#  $1 - name of the node (e.g.: hs-1, rt-3, etc)
+#  $2 - id of the node (e.g.: 1 for hs-1, 3 for rt-3, etc)
+#  $3 - host part of the IPv6 network address
+#  $4 - name of the network interface to which the generated mac address must
+#       be set.
+set_mac_address()
+{
+	local nodename="$1"
+	local nodeid="$2"
+	local host="$3"
+	local ifname="$4"
+	local nsname
+
+	nsname=$(get_nodename "${nodename}")
+
+	ip -netns "${nsname}" link set dev "${ifname}" down
+
+	ip -netns "${nsname}" link set address "${MAC_PREFIX}:${nodeid}" \
+		dev "${ifname}"
+
+	# the IPv6 address must be set once again after the MAC address has
+	# been changed.
+	ip -netns "${nsname}" addr add "${IPv6_HS_NETWORK}::${host}/64" \
+		dev "${ifname}" nodad
+
+	ip -netns "${nsname}" link set dev "${ifname}" up
+}
+
+set_host_l2peer()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local ipprefix="$3"
+	local proto="$4"
+	local hssrc_name
+	local ipaddr
+
+	hssrc_name="$(get_hsname "${hssrc}")"
+
+	if [ "${proto}" -eq 6 ]; then
+		ipaddr="${ipprefix}::${hsdst}"
+	else
+		ipaddr="${ipprefix}.${hsdst}"
+	fi
+
+	ip -netns "${hssrc_name}" route add "${ipaddr}" dev "${HS_VETH_NAME}"
+
+	ip -netns "${hssrc_name}" neigh \
+		add "${ipaddr}" lladdr "${MAC_PREFIX}:${hsdst}" \
+		dev "${HS_VETH_NAME}"
+}
+
+# setup an SRv6 L2 VPN between host hs-x and hs-y (currently, the SRv6
+# subsystem only supports L2 frames whose layer-3 is IPv4/IPv6).
+# args:
+#  $1 - source host
+#  $2 - SRv6 routers configured for steering tunneled traffic
+#  $3 - destination host
+setup_l2vpn()
+{
+	local hssrc="$1"
+	local end_rts="$2"
+	local hsdst="$3"
+	local rtsrc="${hssrc}"
+	local rtdst="${hsdst}"
+
+	# set fixed mac for source node and the neigh MAC address
+	set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+	set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
+	set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
+
+	# we have to set the mac address of the veth-host (on ingress router)
+	# to the mac address of the remote peer (L2 VPN destination host).
+	# Otherwise, traffic coming from the source host is dropped at the
+	# ingress router.
+	set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+
+	# set the SRv6 Policies at the ingress router
+	setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
+		l2encap.red 6
+	setup_rt_policy_ipv4 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
+		l2encap.red 4
+
+	# set the decap behavior
+	setup_decap "${rtsrc}"
+}
+
+setup()
+{
+	local i
+
+	# create routers
+	ROUTERS="1 2 3 4"; readonly ROUTERS
+	for i in ${ROUTERS}; do
+		create_router "${i}"
+	done
+
+	# create hosts
+	HOSTS="1 2"; readonly HOSTS
+	for i in ${HOSTS}; do
+		create_host "${i}"
+	done
+
+	# set up the links for connecting routers
+	add_link_rt_pairs 1 "2 3 4"
+	add_link_rt_pairs 2 "3 4"
+	add_link_rt_pairs 3 "4"
+
+	# set up the basic connectivity of routers and routes required for
+	# reachability of SIDs.
+	setup_rt_networking 1 "2 3 4"
+	setup_rt_networking 2 "1 3 4"
+	setup_rt_networking 3 "1 2 4"
+	setup_rt_networking 4 "1 2 3"
+
+	# set up the hosts connected to routers
+	setup_hs 1 1
+	setup_hs 2 2
+
+	# set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DX2)
+	setup_rt_local_sids 1 "2 3 4"
+	setup_rt_local_sids 2 "1 3 4"
+	setup_rt_local_sids 3 "1 2 4"
+	setup_rt_local_sids 4 "1 2 3"
+
+	# create a L2 VPN between hs-1 and hs-2.
+	# NB: currently, H.L2Encap* enables tunneling of L2 frames whose
+	# layer-3 is IPv4/IPv6.
+	#
+	# the network path between hs-1 and hs-2 traverses several routers
+	# depending on the direction of traffic.
+	#
+	# Direction hs-1 -> hs-2 (H.L2Encaps.Red)
+	# - rt-2 (SRv6 End.DX2 behavior)
+	#
+	# Direction hs-2 -> hs-1 (H.L2Encaps.Red)
+	#  - rt-4,rt-3 (SRv6 End behaviors)
+	#  - rt-1 (SRv6 End.DX2 behavior)
+	setup_l2vpn 1 "" 2
+	setup_l2vpn 2 "4 3" 1
+
+	# testing environment was set up successfully
+	SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+	local prefix
+	local rtsrc_nsname
+
+	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+	ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+
+	check_rt_connectivity "${rtsrc}" "${rtdst}"
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc="$1"
+
+	check_hs_ipv6_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+	check_hs_ipv4_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+	check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+	local i
+	local j
+
+	log_section "IPv6 routers connectivity test"
+
+	for i in ${ROUTERS}; do
+		for j in ${ROUTERS}; do
+			if [ "${i}" -eq "${j}" ]; then
+				continue
+			fi
+
+			check_and_log_rt_connectivity "${i}" "${j}"
+		done
+	done
+}
+
+host2gateway_tests()
+{
+	local hs
+
+	log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+	for hs in ${HOSTS}; do
+		check_and_log_hs2gw_connectivity "${hs}"
+	done
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 L2 VPN connectivity test hosts (h1 <-> h2)"
+
+	check_and_log_hs_connectivity 1 2
+	check_and_log_hs_connectivity 2 1
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+	local test_netns
+
+	test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+	if ! ip netns add "${test_netns}"; then
+		echo "SKIP: Cannot set up netns for testing dummy dev support"
+		exit "${ksft_skip}"
+	fi
+
+	modprobe dummy &>/dev/null || true
+	if ! ip -netns "${test_netns}" link \
+		add "${DUMMY_DEVNAME}" type dummy; then
+		echo "SKIP: dummy dev not supported"
+
+		ip netns del "${test_netns}"
+		exit "${ksft_skip}"
+	fi
+
+	ip netns del "${test_netns}"
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+	if ! ip route help 2>&1 | grep -qo "l2encap.red"; then
+		echo "SKIP: Missing SRv6 l2encap.red support in iproute2"
+		exit "${ksft_skip}"
+	fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges"
+	exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+
+test_iproute2_supp_or_ksft_skip
+test_dummy_dev_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/stress_reuseport_listen.c b/tools/testing/selftests/net/stress_reuseport_listen.c
new file mode 100644
index 000000000000..ef800bb35a8e
--- /dev/null
+++ b/tools/testing/selftests/net/stress_reuseport_listen.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+/* Test listening on the same port 443 with multiple VIPS.
+ * Each VIP:443 will have multiple sk listening on by using
+ * SO_REUSEPORT.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <error.h>
+#include <errno.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#define IP6_LADDR_START "2401:dead::1"
+#define IP6_LPORT 443
+#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_USEC 1000L
+
+static unsigned int nr_socks_per_vip;
+static unsigned int nr_vips;
+
+static int *bind_reuseport_sock6(void)
+{
+	int *lfds, *cur_fd, err, optvalue = 1;
+	struct sockaddr_in6 sa6 = {};
+	unsigned int i, j;
+
+	sa6.sin6_family = AF_INET6;
+	sa6.sin6_port = htons(IP6_LPORT);
+	err = inet_pton(AF_INET6, IP6_LADDR_START, &sa6.sin6_addr);
+	if (err != 1)
+		error(1, err, "inet_pton(%s)", IP6_LADDR_START);
+
+	lfds = malloc(nr_vips * nr_socks_per_vip * sizeof(lfds[0]));
+	if (!lfds)
+		error(1, errno, "cannot alloc array of lfds");
+
+	cur_fd = lfds;
+	for (i = 0; i < nr_vips; i++) {
+		for (j = 0; j < nr_socks_per_vip; j++) {
+			*cur_fd = socket(AF_INET6, SOCK_STREAM, 0);
+			if (*cur_fd == -1)
+				error(1, errno,
+				      "lfds[%u,%u] = socket(AF_INET6)", i, j);
+
+			err = setsockopt(*cur_fd, SOL_SOCKET, SO_REUSEPORT,
+					 &optvalue, sizeof(optvalue));
+			if (err)
+				error(1, errno,
+				      "setsockopt(lfds[%u,%u], SO_REUSEPORT)",
+				      i, j);
+
+			err = bind(*cur_fd, (struct sockaddr *)&sa6,
+				   sizeof(sa6));
+			if (err)
+				error(1, errno, "bind(lfds[%u,%u])", i, j);
+			cur_fd++;
+		}
+		sa6.sin6_addr.s6_addr32[3]++;
+	}
+
+	return lfds;
+}
+
+int main(int argc, const char *argv[])
+{
+	struct timespec start_ts, end_ts;
+	unsigned long start_ns, end_ns;
+	unsigned int nr_lsocks;
+	int *lfds, i, err;
+
+	if (argc != 3 || atoi(argv[1]) <= 0 || atoi(argv[2]) <= 0)
+		error(1, 0, "Usage: %s <nr_vips> <nr_socks_per_vip>\n",
+		      argv[0]);
+
+	nr_vips = atoi(argv[1]);
+	nr_socks_per_vip = atoi(argv[2]);
+	nr_lsocks = nr_vips * nr_socks_per_vip;
+	lfds = bind_reuseport_sock6();
+
+	clock_gettime(CLOCK_MONOTONIC, &start_ts);
+	for (i = 0; i < nr_lsocks; i++) {
+		err = listen(lfds[i], 0);
+		if (err)
+			error(1, errno, "listen(lfds[%d])", i);
+	}
+	clock_gettime(CLOCK_MONOTONIC, &end_ts);
+
+	start_ns = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec;
+	end_ns = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec;
+
+	printf("listen %d socks took %lu.%lu\n", nr_lsocks,
+	       (end_ns - start_ns) / NSEC_PER_SEC,
+	       (end_ns - start_ns) / NSEC_PER_USEC);
+
+	for (i = 0; i < nr_lsocks; i++)
+		close(lfds[i]);
+
+	free(lfds);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh
new file mode 100755
index 000000000000..94d5d1a1c90f
--- /dev/null
+++ b/tools/testing/selftests/net/stress_reuseport_listen.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+
+source lib.sh
+NR_FILES=24100
+SAVED_NR_FILES=$(ulimit -n)
+
+setup() {
+	setup_ns NS
+	ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1
+	ulimit -n $NR_FILES
+}
+
+cleanup() {
+	cleanup_ns $NS
+	ulimit -n $SAVED_NR_FILES
+}
+
+trap cleanup EXIT
+setup
+# 300 different vips listen on port 443
+# Each vip:443 sockaddr has 80 LISTEN sock by using SO_REUSEPORT
+# Total 24000 listening socks
+ip netns exec $NS ./stress_reuseport_listen 300 80
diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
new file mode 100644
index 000000000000..247c3b3ac1c9
--- /dev/null
+++ b/tools/testing/selftests/net/tap.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/virtio_net.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include "../kselftest_harness.h"
+
+static const char param_dev_tap_name[] = "xmacvtap0";
+static const char param_dev_dummy_name[] = "xdummy0";
+static unsigned char param_hwaddr_src[] = { 0x00, 0xfe, 0x98, 0x14, 0x22, 0x42 };
+static unsigned char param_hwaddr_dest[] = {
+	0x00, 0xfe, 0x98, 0x94, 0xd2, 0x43
+};
+
+#define MAX_RTNL_PAYLOAD (2048)
+#define PKT_DATA 0xCB
+#define TEST_PACKET_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU)
+
+static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
+				 unsigned short len)
+{
+	struct rtattr *rta =
+		(struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
+	rta->rta_type = type;
+	rta->rta_len = RTA_LENGTH(len);
+	nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
+	return rta;
+}
+
+static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
+{
+	return rtattr_add(nh, type, 0);
+}
+
+static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+	uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
+
+	attr->rta_len = end - (uint8_t *)attr;
+}
+
+static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
+				     const char *s)
+{
+	struct rtattr *rta = rtattr_add(nh, type, strlen(s));
+
+	memcpy(RTA_DATA(rta), s, strlen(s));
+	return rta;
+}
+
+static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type,
+				       const char *s)
+{
+	struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1);
+
+	strcpy(RTA_DATA(rta), s);
+	return rta;
+}
+
+static struct rtattr *rtattr_add_any(struct nlmsghdr *nh, unsigned short type,
+				     const void *arr, size_t len)
+{
+	struct rtattr *rta = rtattr_add(nh, type, len);
+
+	memcpy(RTA_DATA(rta), arr, len);
+	return rta;
+}
+
+static int dev_create(const char *dev, const char *link_type,
+		      int (*fill_rtattr)(struct nlmsghdr *nh),
+		      int (*fill_info_data)(struct nlmsghdr *nh))
+{
+	struct {
+		struct nlmsghdr nh;
+		struct ifinfomsg info;
+		unsigned char data[MAX_RTNL_PAYLOAD];
+	} req;
+	struct rtattr *link_info, *info_data;
+	int ret, rtnl;
+
+	rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+	if (rtnl < 0) {
+		fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
+		return 1;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+	req.nh.nlmsg_type = RTM_NEWLINK;
+
+	req.info.ifi_family = AF_UNSPEC;
+	req.info.ifi_type = 1;
+	req.info.ifi_index = 0;
+	req.info.ifi_flags = IFF_BROADCAST | IFF_UP;
+	req.info.ifi_change = 0xffffffff;
+
+	rtattr_add_str(&req.nh, IFLA_IFNAME, dev);
+
+	if (fill_rtattr) {
+		ret = fill_rtattr(&req.nh);
+		if (ret)
+			return ret;
+	}
+
+	link_info = rtattr_begin(&req.nh, IFLA_LINKINFO);
+
+	rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type);
+
+	if (fill_info_data) {
+		info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA);
+		ret = fill_info_data(&req.nh);
+		if (ret)
+			return ret;
+		rtattr_end(&req.nh, info_data);
+	}
+
+	rtattr_end(&req.nh, link_info);
+
+	ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+	if (ret < 0)
+		fprintf(stderr, "%s: send %s\n", __func__, strerror(errno));
+	ret = (unsigned int)ret != req.nh.nlmsg_len;
+
+	close(rtnl);
+	return ret;
+}
+
+static int dev_delete(const char *dev)
+{
+	struct {
+		struct nlmsghdr nh;
+		struct ifinfomsg info;
+		unsigned char data[MAX_RTNL_PAYLOAD];
+	} req;
+	int ret, rtnl;
+
+	rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+	if (rtnl < 0) {
+		fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
+		return 1;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_flags = NLM_F_REQUEST;
+	req.nh.nlmsg_type = RTM_DELLINK;
+
+	req.info.ifi_family = AF_UNSPEC;
+
+	rtattr_add_str(&req.nh, IFLA_IFNAME, dev);
+
+	ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+	if (ret < 0)
+		fprintf(stderr, "%s: send %s\n", __func__, strerror(errno));
+
+	ret = (unsigned int)ret != req.nh.nlmsg_len;
+
+	close(rtnl);
+	return ret;
+}
+
+static int macvtap_fill_rtattr(struct nlmsghdr *nh)
+{
+	int ifindex;
+
+	ifindex = if_nametoindex(param_dev_dummy_name);
+	if (ifindex == 0) {
+		fprintf(stderr, "%s: ifindex  %s\n", __func__, strerror(errno));
+		return -errno;
+	}
+
+	rtattr_add_any(nh, IFLA_LINK, &ifindex, sizeof(ifindex));
+	rtattr_add_any(nh, IFLA_ADDRESS, param_hwaddr_src, ETH_ALEN);
+
+	return 0;
+}
+
+static int opentap(const char *devname)
+{
+	int ifindex;
+	char buf[256];
+	int fd;
+	struct ifreq ifr;
+
+	ifindex = if_nametoindex(devname);
+	if (ifindex == 0) {
+		fprintf(stderr, "%s: ifindex %s\n", __func__, strerror(errno));
+		return -errno;
+	}
+
+	sprintf(buf, "/dev/tap%d", ifindex);
+	fd = open(buf, O_RDWR | O_NONBLOCK);
+	if (fd < 0) {
+		fprintf(stderr, "%s: open %s\n", __func__, strerror(errno));
+		return -errno;
+	}
+
+	memset(&ifr, 0, sizeof(ifr));
+	strcpy(ifr.ifr_name, devname);
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | IFF_MULTI_QUEUE;
+	if (ioctl(fd, TUNSETIFF, &ifr, sizeof(ifr)) < 0)
+		return -errno;
+	return fd;
+}
+
+size_t build_eth(uint8_t *buf, uint16_t proto)
+{
+	struct ethhdr *eth = (struct ethhdr *)buf;
+
+	eth->h_proto = htons(proto);
+	memcpy(eth->h_source, param_hwaddr_src, ETH_ALEN);
+	memcpy(eth->h_dest, param_hwaddr_dest, ETH_ALEN);
+
+	return ETH_HLEN;
+}
+
+static uint32_t add_csum(const uint8_t *buf, int len)
+{
+	uint32_t sum = 0;
+	uint16_t *sbuf = (uint16_t *)buf;
+
+	while (len > 1) {
+		sum += *sbuf++;
+		len -= 2;
+	}
+
+	if (len)
+		sum += *(uint8_t *)sbuf;
+
+	return sum;
+}
+
+static uint16_t finish_ip_csum(uint32_t sum)
+{
+	uint16_t lo = sum & 0xffff;
+	uint16_t hi = sum >> 16;
+
+	return ~(lo + hi);
+
+}
+
+static uint16_t build_ip_csum(const uint8_t *buf, int len,
+			      uint32_t sum)
+{
+	sum += add_csum(buf, len);
+	return finish_ip_csum(sum);
+}
+
+static int build_ipv4_header(uint8_t *buf, int payload_len)
+{
+	struct iphdr *iph = (struct iphdr *)buf;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = 8;
+	iph->tot_len =
+		htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = IPPROTO_UDP;
+	iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+	iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+	iph->check = build_ip_csum(buf, iph->ihl << 2, 0);
+
+	return iph->ihl << 2;
+}
+
+static int build_udp_packet(uint8_t *buf, int payload_len, bool csum_off)
+{
+	const int ip4alen = sizeof(uint32_t);
+	struct udphdr *udph = (struct udphdr *)buf;
+	int len = sizeof(*udph) + payload_len;
+	uint32_t sum = 0;
+
+	udph->source = htons(22);
+	udph->dest = htons(58822);
+	udph->len = htons(len);
+
+	memset(buf + sizeof(struct udphdr), PKT_DATA, payload_len);
+
+	sum = add_csum(buf - 2 * ip4alen, 2 * ip4alen);
+	sum += htons(IPPROTO_UDP) + udph->len;
+
+	if (!csum_off)
+		sum += add_csum(buf, len);
+
+	udph->check = finish_ip_csum(sum);
+
+	return sizeof(*udph) + payload_len;
+}
+
+size_t build_test_packet_valid_udp_gso(uint8_t *buf, size_t payload_len)
+{
+	uint8_t *cur = buf;
+	struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
+
+	vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+	vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+	vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+	vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+	vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+	vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr);
+	cur += sizeof(*vh);
+
+	cur += build_eth(cur, ETH_P_IP);
+	cur += build_ipv4_header(cur, payload_len);
+	cur += build_udp_packet(cur, payload_len, true);
+
+	return cur - buf;
+}
+
+size_t build_test_packet_valid_udp_csum(uint8_t *buf, size_t payload_len)
+{
+	uint8_t *cur = buf;
+	struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
+
+	vh->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+	vh->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+	cur += sizeof(*vh);
+
+	cur += build_eth(cur, ETH_P_IP);
+	cur += build_ipv4_header(cur, payload_len);
+	cur += build_udp_packet(cur, payload_len, false);
+
+	return cur - buf;
+}
+
+size_t build_test_packet_crash_tap_invalid_eth_proto(uint8_t *buf,
+						     size_t payload_len)
+{
+	uint8_t *cur = buf;
+	struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
+
+	vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+	vh->flags = 0;
+	vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+	vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr);
+	cur += sizeof(*vh);
+
+	cur += build_eth(cur, 0);
+	cur += sizeof(struct iphdr) + sizeof(struct udphdr);
+	cur += build_ipv4_header(cur, payload_len);
+	cur += build_udp_packet(cur, payload_len, true);
+	cur += payload_len;
+
+	return cur - buf;
+}
+
+FIXTURE(tap)
+{
+	int fd;
+};
+
+FIXTURE_SETUP(tap)
+{
+	int ret;
+
+	ret = dev_create(param_dev_dummy_name, "dummy", NULL, NULL);
+	EXPECT_EQ(ret, 0);
+
+	ret = dev_create(param_dev_tap_name, "macvtap", macvtap_fill_rtattr,
+			 NULL);
+	EXPECT_EQ(ret, 0);
+
+	self->fd = opentap(param_dev_tap_name);
+	ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_TEARDOWN(tap)
+{
+	int ret;
+
+	if (self->fd != -1)
+		close(self->fd);
+
+	ret = dev_delete(param_dev_tap_name);
+	EXPECT_EQ(ret, 0);
+
+	ret = dev_delete(param_dev_dummy_name);
+	EXPECT_EQ(ret, 0);
+}
+
+TEST_F(tap, test_packet_valid_udp_gso)
+{
+	uint8_t pkt[TEST_PACKET_SZ];
+	size_t off;
+	int ret;
+
+	memset(pkt, 0, sizeof(pkt));
+	off = build_test_packet_valid_udp_gso(pkt, 1021);
+	ret = write(self->fd, pkt, off);
+	ASSERT_EQ(ret, off);
+}
+
+TEST_F(tap, test_packet_valid_udp_csum)
+{
+	uint8_t pkt[TEST_PACKET_SZ];
+	size_t off;
+	int ret;
+
+	memset(pkt, 0, sizeof(pkt));
+	off = build_test_packet_valid_udp_csum(pkt, 1024);
+	ret = write(self->fd, pkt, off);
+	ASSERT_EQ(ret, off);
+}
+
+TEST_F(tap, test_packet_crash_tap_invalid_eth_proto)
+{
+	uint8_t pkt[TEST_PACKET_SZ];
+	size_t off;
+	int ret;
+
+	memset(pkt, 0, sizeof(pkt));
+	off = build_test_packet_crash_tap_invalid_eth_proto(pkt, 1024);
+	ret = write(self->fd, pkt, off);
+	ASSERT_EQ(ret, -1);
+	ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/tcp_ao/.gitignore b/tools/testing/selftests/net/tcp_ao/.gitignore
new file mode 100644
index 000000000000..e8bb81b715b7
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/.gitignore
@@ -0,0 +1,2 @@
+*_ipv4
+*_ipv6
diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
new file mode 100644
index 000000000000..522d991e310e
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_BOTH_AF := bench-lookups
+TEST_BOTH_AF += connect
+TEST_BOTH_AF += connect-deny
+TEST_BOTH_AF += icmps-accept icmps-discard
+TEST_BOTH_AF += key-management
+TEST_BOTH_AF += restore
+TEST_BOTH_AF += rst
+TEST_BOTH_AF += self-connect
+TEST_BOTH_AF += seq-ext
+TEST_BOTH_AF += setsockopt-closed
+TEST_BOTH_AF += unsigned-md5
+
+TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4)
+TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6)
+
+TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS)
+
+top_srcdir	  := ../../../../..
+include ../../lib.mk
+
+HOSTAR ?= ar
+
+LIBDIR	:= $(OUTPUT)/lib
+LIB	:= $(LIBDIR)/libaotst.a
+LDLIBS	+= $(LIB) -pthread
+LIBDEPS	:= lib/aolib.h Makefile
+
+CFLAGS	:= -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing
+CFLAGS	+= $(KHDR_INCLUDES)
+CFLAGS	+= -iquote ./lib/ -I ../../../../include/
+
+# Library
+LIBSRC	:= kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c
+LIBOBJ	:= $(LIBSRC:%.c=$(LIBDIR)/%.o)
+EXTRA_CLEAN += $(LIBOBJ) $(LIB)
+
+$(LIB): $(LIBOBJ)
+	$(HOSTAR) rcs $@ $^
+
+$(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS)
+	mkdir -p $(LIBDIR)
+	$(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c
+
+$(TEST_GEN_PROGS): $(LIB)
+
+$(OUTPUT)/%_ipv4: %.c
+	$(LINK.c) $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/%_ipv6: %.c
+	$(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT
+$(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT
+$(OUTPUT)/bench-lookups_ipv4: LDLIBS+= -lm
+$(OUTPUT)/bench-lookups_ipv6: LDLIBS+= -lm
diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
new file mode 100644
index 000000000000..a1e6e007c291
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <arpa/inet.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "../../../../include/linux/bits.h"
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+#define BENCH_NR_ITERS	100 /* number of times to run gathering statistics */
+
+static void gen_test_ips(union tcp_addr *ips, size_t ips_nr, bool use_rand)
+{
+	union tcp_addr net = {};
+	size_t i, j;
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+
+	if (!use_rand) {
+		for (i = 0; i < ips_nr; i++)
+			ips[i] = gen_tcp_addr(net, 2 * i + 1);
+		return;
+	}
+	for (i = 0; i < ips_nr; i++) {
+		size_t r = (size_t)random() | 0x1;
+
+		ips[i] = gen_tcp_addr(net, r);
+
+		for (j = i - 1; j > 0 && i > 0; j--) {
+			if (!memcmp(&ips[i], &ips[j], sizeof(union tcp_addr))) {
+				i--; /* collision */
+				break;
+			}
+		}
+	}
+}
+
+static void test_add_routes(union tcp_addr *ips, size_t ips_nr)
+{
+	size_t i;
+
+	for (i = 0; i < ips_nr; i++) {
+		union tcp_addr *p = (union tcp_addr *)&ips[i];
+		int err;
+
+		err = ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p);
+		if (err && err != -EEXIST)
+			test_error("Failed to add route");
+	}
+}
+
+static void server_apply_keys(int lsk, union tcp_addr *ips, size_t ips_nr)
+{
+	size_t i;
+
+	for (i = 0; i < ips_nr; i++) {
+		union tcp_addr *p = (union tcp_addr *)&ips[i];
+
+		if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100))
+			test_error("setsockopt(TCP_AO)");
+	}
+}
+
+static const size_t nr_keys[] = { 512, 1024, 2048, 4096, 8192 };
+static union tcp_addr *test_ips;
+
+struct bench_stats {
+	uint64_t min;
+	uint64_t max;
+	uint64_t nr;
+	double mean;
+	double s2;
+};
+
+static struct bench_tests {
+	struct bench_stats delete_last_key;
+	struct bench_stats add_key;
+	struct bench_stats delete_rand_key;
+	struct bench_stats connect_last_key;
+	struct bench_stats connect_rand_key;
+	struct bench_stats delete_async;
+} bench_results[ARRAY_SIZE(nr_keys)];
+
+#define NSEC_PER_SEC 1000000000ULL
+
+static void measure_call(struct bench_stats *st,
+			 void (*f)(int, void *), int sk, void *arg)
+{
+	struct timespec start = {}, end = {};
+	double delta;
+	uint64_t nsec;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &start))
+		test_error("clock_gettime()");
+
+	f(sk, arg);
+
+	if (clock_gettime(CLOCK_MONOTONIC, &end))
+		test_error("clock_gettime()");
+
+	nsec = (end.tv_sec - start.tv_sec) * NSEC_PER_SEC;
+	if (end.tv_nsec >= start.tv_nsec)
+		nsec += end.tv_nsec - start.tv_nsec;
+	else
+		nsec -= start.tv_nsec - end.tv_nsec;
+
+	if (st->nr == 0) {
+		st->min = st->max = nsec;
+	} else {
+		if (st->min > nsec)
+			st->min = nsec;
+		if (st->max < nsec)
+			st->max = nsec;
+	}
+
+	/* Welford-Knuth algorithm */
+	st->nr++;
+	delta = (double)nsec - st->mean;
+	st->mean += delta / st->nr;
+	st->s2 += delta * ((double)nsec - st->mean);
+}
+
+static void delete_mkt(int sk, void *arg)
+{
+	struct tcp_ao_del *ao = arg;
+
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, ao, sizeof(*ao)))
+		test_error("setsockopt(TCP_AO_DEL_KEY)");
+}
+
+static void add_back_mkt(int sk, void *arg)
+{
+	union tcp_addr *p = arg;
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100))
+		test_error("setsockopt(TCP_AO)");
+}
+
+static void bench_delete(int lsk, struct bench_stats *add,
+			 struct bench_stats *del,
+			 union tcp_addr *ips, size_t ips_nr,
+			 bool rand_order, bool async)
+{
+	struct tcp_ao_del ao_del = {};
+	union tcp_addr *p;
+	size_t i;
+
+	ao_del.sndid = 100;
+	ao_del.rcvid = 100;
+	ao_del.del_async = !!async;
+	ao_del.prefix = DEFAULT_TEST_PREFIX;
+
+	/* Remove the first added */
+	p = (union tcp_addr *)&ips[0];
+	tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0);
+
+	for (i = 0; i < BENCH_NR_ITERS; i++) {
+		measure_call(del, delete_mkt, lsk, (void *)&ao_del);
+
+		/* Restore it back */
+		measure_call(add, add_back_mkt, lsk, (void *)p);
+
+		/*
+		 * Slowest for FILO-linked-list:
+		 * on (i) iteration removing ips[i] element. When it gets
+		 * added to the list back - it becomes first to fetch, so
+		 * on (i + 1) iteration go to ips[i + 1] element.
+		 */
+		if (rand_order)
+			p = (union tcp_addr *)&ips[rand() % ips_nr];
+		else
+			p = (union tcp_addr *)&ips[i % ips_nr];
+		tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0);
+	}
+}
+
+static void bench_connect_srv(int lsk, union tcp_addr *ips, size_t ips_nr)
+{
+	size_t i;
+
+	for (i = 0; i < BENCH_NR_ITERS; i++) {
+		int sk;
+
+		synchronize_threads();
+
+		if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+			test_error("test_wait_fd()");
+
+		sk = accept(lsk, NULL, NULL);
+		if (sk < 0)
+			test_error("accept()");
+
+		close(sk);
+	}
+}
+
+static void test_print_stats(const char *desc, size_t nr, struct bench_stats *bs)
+{
+	test_ok("%-20s\t%zu keys: min=%" PRIu64 "ms max=%" PRIu64 "ms mean=%gms stddev=%g",
+		desc, nr, bs->min / 1000000, bs->max / 1000000,
+		bs->mean / 1000000, sqrt((bs->mean / 1000000) / bs->nr));
+}
+
+static void *server_fn(void *arg)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(nr_keys); i++) {
+		struct bench_tests *bt = &bench_results[i];
+		int lsk;
+
+		test_ips = malloc(nr_keys[i] * sizeof(union tcp_addr));
+		if (!test_ips)
+			test_error("malloc()");
+
+		lsk = test_listen_socket(this_ip_addr, test_server_port + i, 1);
+
+		gen_test_ips(test_ips, nr_keys[i], false);
+		test_add_routes(test_ips, nr_keys[i]);
+		test_set_optmem(KERNEL_TCP_AO_KEY_SZ_ROUND_UP * nr_keys[i]);
+		server_apply_keys(lsk, test_ips, nr_keys[i]);
+
+		synchronize_threads();
+		bench_connect_srv(lsk, test_ips, nr_keys[i]);
+		bench_connect_srv(lsk, test_ips, nr_keys[i]);
+
+		/* The worst case for FILO-list */
+		bench_delete(lsk, &bt->add_key, &bt->delete_last_key,
+			     test_ips, nr_keys[i], false, false);
+		test_print_stats("Add a new key",
+				nr_keys[i], &bt->add_key);
+		test_print_stats("Delete: worst case",
+				nr_keys[i], &bt->delete_last_key);
+
+		bench_delete(lsk, &bt->add_key, &bt->delete_rand_key,
+			     test_ips, nr_keys[i], true, false);
+		test_print_stats("Delete: random-search",
+				nr_keys[i], &bt->delete_rand_key);
+
+		bench_delete(lsk, &bt->add_key, &bt->delete_async,
+			     test_ips, nr_keys[i], false, true);
+		test_print_stats("Delete: async", nr_keys[i], &bt->delete_async);
+
+		free(test_ips);
+		close(lsk);
+	}
+
+	return NULL;
+}
+
+static void connect_client(int sk, void *arg)
+{
+	size_t *p = arg;
+
+	if (test_connect_socket(sk, this_ip_dest, test_server_port + *p) <= 0)
+		test_error("failed to connect()");
+}
+
+static void client_addr_setup(int sk, union tcp_addr taddr)
+{
+#ifdef IPV6_TEST
+	struct sockaddr_in6 addr = {
+		.sin6_family	= AF_INET6,
+		.sin6_port	= 0,
+		.sin6_addr	= taddr.a6,
+	};
+#else
+	struct sockaddr_in addr = {
+		.sin_family	= AF_INET,
+		.sin_port	= 0,
+		.sin_addr	= taddr.a4,
+	};
+#endif
+	int ret;
+
+	ret = ip_addr_add(veth_name, TEST_FAMILY, taddr, TEST_PREFIX);
+	if (ret && ret != -EEXIST)
+		test_error("Failed to add ip address");
+	ret = ip_route_add(veth_name, TEST_FAMILY, taddr, this_ip_dest);
+	if (ret && ret != -EEXIST)
+		test_error("Failed to add route");
+
+	if (bind(sk, &addr, sizeof(addr)))
+		test_error("bind()");
+}
+
+static void bench_connect_client(size_t port_off, struct bench_tests *bt,
+		union tcp_addr *ips, size_t ips_nr, bool rand_order)
+{
+	struct bench_stats *con;
+	union tcp_addr *p;
+	size_t i;
+
+	if (rand_order)
+		con = &bt->connect_rand_key;
+	else
+		con = &bt->connect_last_key;
+
+	p = (union tcp_addr *)&ips[0];
+
+	for (i = 0; i < BENCH_NR_ITERS; i++) {
+		int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+		if (sk < 0)
+			test_error("socket()");
+
+		client_addr_setup(sk, *p);
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+				 -1, 100, 100))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+		synchronize_threads();
+
+		measure_call(con, connect_client, sk, (void *)&port_off);
+
+		close(sk);
+
+		/*
+		 * Slowest for FILO-linked-list:
+		 * on (i) iteration removing ips[i] element. When it gets
+		 * added to the list back - it becomes first to fetch, so
+		 * on (i + 1) iteration go to ips[i + 1] element.
+		 */
+		if (rand_order)
+			p = (union tcp_addr *)&ips[rand() % ips_nr];
+		else
+			p = (union tcp_addr *)&ips[i % ips_nr];
+	}
+}
+
+static void *client_fn(void *arg)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(nr_keys); i++) {
+		struct bench_tests *bt = &bench_results[i];
+
+		synchronize_threads();
+		bench_connect_client(i, bt, test_ips, nr_keys[i], false);
+		test_print_stats("Connect: worst case",
+				nr_keys[i], &bt->connect_last_key);
+
+		bench_connect_client(i, bt, test_ips, nr_keys[i], false);
+		test_print_stats("Connect: random-search",
+				nr_keys[i], &bt->connect_last_key);
+	}
+	synchronize_threads();
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(30, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
new file mode 100644
index 000000000000..d3277a9de987
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -0,0 +1,10 @@
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_RMD160=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_TCP_AO=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
new file mode 100644
index 000000000000..185a2f6e5ff3
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+#define fault(type)	(inj == FAULT_ ## type)
+
+static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen,
+				      union tcp_addr in_addr, uint8_t prefix,
+				      uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_add tmp = {};
+	int err;
+
+	if (prefix > DEFAULT_TEST_PREFIX)
+		prefix = DEFAULT_TEST_PREFIX;
+
+	err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, false, false,
+			       prefix, 0, sndid, rcvid, maclen,
+			       0, strlen(key), key);
+	if (err)
+		return err;
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+	if (err < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
+		       union tcp_addr addr, uint8_t prefix,
+		       uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+		       const char *cnt_name, test_cnt cnt_expected,
+		       fault_t inj)
+{
+	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+	int lsk, err, sk = 0;
+	time_t timeout;
+
+	lsk = test_listen_socket(this_ip_addr, port, 1);
+
+	if (pwd && test_add_key_maclen(lsk, pwd, maclen, addr, prefix, sndid, rcvid))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	if (cnt_name)
+		before_cnt = netstat_get_one(cnt_name, NULL);
+	if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	err = test_wait_fd(lsk, timeout, 0);
+	if (err == -ETIMEDOUT) {
+		if (!fault(TIMEOUT))
+			test_fail("timed out for accept()");
+	} else if (err < 0) {
+		test_error("test_wait_fd()");
+	} else {
+		if (fault(TIMEOUT))
+			test_fail("ready to accept");
+
+		sk = accept(lsk, NULL, NULL);
+		if (sk < 0) {
+			test_error("accept()");
+		} else {
+			if (fault(TIMEOUT))
+				test_fail("%s: accepted", tst_name);
+		}
+	}
+
+	if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
+		test_error("test_get_tcp_ao_counters()");
+
+	close(lsk);
+	if (pwd)
+		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+
+	if (!cnt_name)
+		goto out;
+
+	after_cnt = netstat_get_one(cnt_name, NULL);
+
+	if (after_cnt <= before_cnt) {
+		test_fail("%s: %s counter did not increase: %zu <= %zu",
+				tst_name, cnt_name, after_cnt, before_cnt);
+	} else {
+		test_ok("%s: counter %s increased %zu => %zu",
+			tst_name, cnt_name, before_cnt, after_cnt);
+	}
+
+out:
+	synchronize_threads(); /* close() */
+	if (sk > 0)
+		close(sk);
+}
+
+static void *server_fn(void *arg)
+{
+	union tcp_addr wrong_addr, network_addr;
+	unsigned int port = test_server_port;
+
+	if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_WRONG_IP);
+
+	try_accept("Non-AO server + AO client", port++, NULL,
+		   this_ip_dest, -1, 100, 100, 0,
+		   "TCPAOKeyNotFound", 0, FAULT_TIMEOUT);
+
+	try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 100, 100, 0,
+		   "TCPAORequired", TEST_CNT_AO_REQUIRED, FAULT_TIMEOUT);
+
+	try_accept("Wrong password", port++, "something that is not DEFAULT_TEST_PASSWORD",
+		   this_ip_dest, -1, 100, 100, 0,
+		   "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT);
+
+	try_accept("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 100, 101, 0,
+		   "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+
+	try_accept("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 101, 100, 0,
+		   "TCPAOGood", TEST_CNT_GOOD, FAULT_TIMEOUT);
+
+	try_accept("Different maclen", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 100, 100, 8,
+		   "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT);
+
+	try_accept("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+		   wrong_addr, -1, 100, 100, 0,
+		   "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+
+	try_accept("Client: Wrong addr", port++, NULL,
+		   this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT);
+
+	try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 200, 100, 0,
+		   "TCPAOGood", TEST_CNT_GOOD, 0);
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+
+	try_accept("Server: prefix match", port++, DEFAULT_TEST_PASSWORD,
+		   network_addr, 16, 100, 100, 0,
+		   "TCPAOGood", TEST_CNT_GOOD, 0);
+
+	try_accept("Client: prefix match", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 100, 100, 0,
+		   "TCPAOGood", TEST_CNT_GOOD, 0);
+
+	/* client exits */
+	synchronize_threads();
+	return NULL;
+}
+
+static void try_connect(const char *tst_name, unsigned int port,
+			const char *pwd, union tcp_addr addr, uint8_t prefix,
+			uint8_t sndid, uint8_t rcvid,
+			test_cnt cnt_expected, fault_t inj)
+{
+	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	time_t timeout;
+	int sk, ret;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+	if (ret < 0) {
+		if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
+			test_ok("%s: connect() was prevented", tst_name);
+		} else if (ret == -ETIMEDOUT && fault(TIMEOUT)) {
+			test_ok("%s", tst_name);
+		} else if (ret == -ECONNREFUSED &&
+				(fault(TIMEOUT) || fault(KEYREJECT))) {
+			test_ok("%s: refused to connect", tst_name);
+		} else {
+			test_error("%s: connect() returned %d", tst_name, ret);
+		}
+		goto out;
+	}
+
+	if (fault(TIMEOUT) || fault(KEYREJECT))
+		test_fail("%s: connected", tst_name);
+	else
+		test_ok("%s: connected", tst_name);
+	if (pwd && ret > 0) {
+		if (test_get_tcp_ao_counters(sk, &ao_cnt2))
+			test_error("test_get_tcp_ao_counters()");
+		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+	}
+out:
+	synchronize_threads(); /* close() */
+
+	if (ret > 0)
+		close(sk);
+}
+
+static void *client_fn(void *arg)
+{
+	union tcp_addr wrong_addr, network_addr;
+	unsigned int port = test_server_port;
+
+	if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_WRONG_IP);
+
+	try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("AO server + Non-AO client", port++, NULL,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Client: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+			wrong_addr, -1, 100, 100, 0, FAULT_KEYREJECT);
+
+	try_connect("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 200, TEST_CNT_GOOD, 0);
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+
+	try_connect("Server: prefix match", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, TEST_CNT_GOOD, 0);
+
+	try_connect("Client: prefix match", port++, DEFAULT_TEST_PASSWORD,
+			network_addr, 16, 100, 100, TEST_CNT_GOOD, 0);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(21, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
new file mode 100644
index 000000000000..81653b47f303
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+static void *server_fn(void *arg)
+{
+	int sk, lsk;
+	ssize_t bytes;
+
+	lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	synchronize_threads();
+
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads();
+
+	bytes = test_server_run(sk, 0, 0);
+
+	test_fail("server served: %zd", bytes);
+	return NULL;
+}
+
+static void *client_fn(void *arg)
+{
+	int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	uint64_t before_aogood, after_aogood;
+	const size_t nr_packets = 20;
+	struct netstat *ns_before, *ns_after;
+	struct tcp_ao_counters ao1, ao2;
+
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads();
+	if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+		test_error("failed to connect()");
+	synchronize_threads();
+
+	ns_before = netstat_read();
+	before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+		test_fail("verify failed");
+		return NULL;
+	}
+
+	ns_after = netstat_read();
+	after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	netstat_print_diff(ns_before, ns_after);
+	netstat_free(ns_before);
+	netstat_free(ns_after);
+
+	if (nr_packets > (after_aogood - before_aogood)) {
+		test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)",
+				nr_packets, after_aogood, before_aogood);
+		return NULL;
+	}
+	if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
+		return NULL;
+
+	test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64,
+			before_aogood, ao1.ao_info_pkt_good,
+			ao1.key_cnts[0].pkt_good,
+			after_aogood, ao2.ao_info_pkt_good,
+			ao2.key_cnts[0].pkt_good,
+			nr_packets);
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(1, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-accept.c b/tools/testing/selftests/net/tcp_ao/icmps-accept.c
new file mode 120000
index 000000000000..0a5bb85eb260
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/icmps-accept.c
@@ -0,0 +1 @@
+icmps-discard.c
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
new file mode 100644
index 000000000000..d69bcba3c929
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Selftest that verifies that incomping ICMPs are ignored,
+ * the TCP connection stays alive, no hard or soft errors get reported
+ * to the usespace and the counter for ignored ICMPs is updated.
+ *
+ * RFC5925, 7.8:
+ * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
+ * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
+ * unreachable, port unreachable, and fragmentation needed -- ’hard
+ * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
+ * (administratively prohibited) and Code 4 (port unreachable) intended
+ * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
+ * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
+ *
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+#include <inttypes.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <sys/socket.h>
+#include "aolib.h"
+#include "../../../../include/linux/compiler.h"
+
+const size_t packets_nr = 20;
+const size_t packet_size = 100;
+const char *tcpao_icmps	= "TCPAODroppedIcmps";
+
+#ifdef IPV6_TEST
+const char *dst_unreach	= "Icmp6InDestUnreachs";
+const int sk_ip_level	= SOL_IPV6;
+const int sk_recverr	= IPV6_RECVERR;
+#else
+const char *dst_unreach	= "InDestUnreachs";
+const int sk_ip_level	= SOL_IP;
+const int sk_recverr	= IP_RECVERR;
+#endif
+
+/* Server is expected to fail with hard error if ::accept_icmp is set */
+#ifdef TEST_ICMPS_ACCEPT
+# define test_icmps_fail test_ok
+# define test_icmps_ok test_fail
+#else
+# define test_icmps_fail test_fail
+# define test_icmps_ok test_ok
+#endif
+
+static void serve_interfered(int sk)
+{
+	ssize_t test_quota = packet_size * packets_nr * 10;
+	uint64_t dest_unreach_a, dest_unreach_b;
+	uint64_t icmp_ignored_a, icmp_ignored_b;
+	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	bool counter_not_found;
+	struct netstat *ns_after, *ns_before;
+	ssize_t bytes;
+
+	ns_before = netstat_read();
+	dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL);
+	icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL);
+	if (test_get_tcp_ao_counters(sk, &ao_cnt1))
+		test_error("test_get_tcp_ao_counters()");
+	bytes = test_server_run(sk, test_quota, 0);
+	ns_after = netstat_read();
+	netstat_print_diff(ns_before, ns_after);
+	dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL);
+	icmp_ignored_b = netstat_get(ns_after, tcpao_icmps,
+					&counter_not_found);
+	if (test_get_tcp_ao_counters(sk, &ao_cnt2))
+		test_error("test_get_tcp_ao_counters()");
+
+	netstat_free(ns_before);
+	netstat_free(ns_after);
+
+	if (dest_unreach_a >= dest_unreach_b) {
+		test_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
+				dst_unreach, dest_unreach_a, dest_unreach_b);
+		return;
+	}
+	test_ok("%s delivered %" PRIu64,
+		dst_unreach, dest_unreach_b - dest_unreach_a);
+	if (bytes < 0)
+		test_icmps_fail("Server failed with %zd: %s", bytes, strerrordesc_np(-bytes));
+	else
+		test_icmps_ok("Server survived %zd bytes of traffic", test_quota);
+	if (counter_not_found) {
+		test_fail("Not found %s counter", tcpao_icmps);
+		return;
+	}
+#ifdef TEST_ICMPS_ACCEPT
+	test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD);
+#else
+	test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
+#endif
+	if (icmp_ignored_a >= icmp_ignored_b) {
+		test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
+				tcpao_icmps, icmp_ignored_a, icmp_ignored_b);
+		return;
+	}
+	test_icmps_ok("ICMPs ignored %" PRIu64, icmp_ignored_b - icmp_ignored_a);
+}
+
+static void *server_fn(void *arg)
+{
+	int val, sk, lsk;
+	bool accept_icmps = false;
+
+	lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+#ifdef TEST_ICMPS_ACCEPT
+	accept_icmps = true;
+#endif
+
+	if (test_set_ao_flags(lsk, false, accept_icmps))
+		test_error("setsockopt(TCP_AO_INFO)");
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	synchronize_threads();
+
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	/* Fail on hard ip errors, such as dest unreachable (RFC1122) */
+	val = 1;
+	if (setsockopt(sk, sk_ip_level, sk_recverr, &val, sizeof(val)))
+		test_error("setsockopt()");
+
+	synchronize_threads();
+
+	serve_interfered(sk);
+	return NULL;
+}
+
+static size_t packets_sent;
+static size_t icmps_sent;
+
+static uint32_t checksum4_nofold(void *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = data;
+	size_t i;
+
+	for (i = 0; i < len / sizeof(uint16_t); i++)
+		sum += words[i];
+	if (len & 1)
+		sum += ((char *)data)[len - 1];
+	return sum;
+}
+
+static uint16_t checksum4_fold(void *data, size_t len, uint32_t sum)
+{
+	sum = checksum4_nofold(data, len, sum);
+	while (sum > 0xFFFF)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+	return ~sum;
+}
+
+static void set_ip4hdr(struct iphdr *iph, size_t packet_len, int proto,
+		struct sockaddr_in *src, struct sockaddr_in *dst)
+{
+	iph->version	= 4;
+	iph->ihl	= 5;
+	iph->tos	= 0;
+	iph->tot_len	= htons(packet_len);
+	iph->ttl	= 2;
+	iph->protocol	= proto;
+	iph->saddr	= src->sin_addr.s_addr;
+	iph->daddr	= dst->sin_addr.s_addr;
+	iph->check	= checksum4_fold((void *)iph, iph->ihl << 1, 0);
+}
+
+static void icmp_interfere4(uint8_t type, uint8_t code, uint32_t rcv_nxt,
+		struct sockaddr_in *src, struct sockaddr_in *dst)
+{
+	int sk = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+	struct {
+		struct iphdr iph;
+		struct icmphdr icmph;
+		struct iphdr iphe;
+		struct {
+			uint16_t sport;
+			uint16_t dport;
+			uint32_t seq;
+		} tcph;
+	} packet = {};
+	size_t packet_len;
+	ssize_t bytes;
+
+	if (sk < 0)
+		test_error("socket(AF_INET, SOCK_RAW, IPPROTO_RAW)");
+
+	packet_len = sizeof(packet);
+	set_ip4hdr(&packet.iph, packet_len, IPPROTO_ICMP, src, dst);
+
+	packet.icmph.type = type;
+	packet.icmph.code = code;
+	if (code == ICMP_FRAG_NEEDED) {
+		randomize_buffer(&packet.icmph.un.frag.mtu,
+				sizeof(packet.icmph.un.frag.mtu));
+	}
+
+	packet_len = sizeof(packet.iphe) + sizeof(packet.tcph);
+	set_ip4hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src);
+
+	packet.tcph.sport = dst->sin_port;
+	packet.tcph.dport = src->sin_port;
+	packet.tcph.seq = htonl(rcv_nxt);
+
+	packet_len = sizeof(packet) - sizeof(packet.iph);
+	packet.icmph.checksum = checksum4_fold((void *)&packet.icmph,
+						packet_len, 0);
+
+	bytes = sendto(sk, &packet, sizeof(packet), 0,
+		       (struct sockaddr *)dst, sizeof(*dst));
+	if (bytes != sizeof(packet))
+		test_error("send(): %zd", bytes);
+	icmps_sent++;
+
+	close(sk);
+}
+
+static void set_ip6hdr(struct ipv6hdr *iph, size_t packet_len, int proto,
+		struct sockaddr_in6 *src, struct sockaddr_in6 *dst)
+{
+	iph->version		= 6;
+	iph->payload_len	= htons(packet_len);
+	iph->nexthdr		= proto;
+	iph->hop_limit		= 2;
+	iph->saddr		= src->sin6_addr;
+	iph->daddr		= dst->sin6_addr;
+}
+
+static inline uint16_t csum_fold(uint32_t csum)
+{
+	uint32_t sum = csum;
+
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (uint16_t)~sum;
+}
+
+static inline uint32_t csum_add(uint32_t csum, uint32_t addend)
+{
+	uint32_t res = csum;
+
+	res += addend;
+	return res + (res < addend);
+}
+
+noinline uint32_t checksum6_nofold(void *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = data;
+	size_t i;
+
+	for (i = 0; i < len / sizeof(uint16_t); i++)
+		sum = csum_add(sum, words[i]);
+	if (len & 1)
+		sum = csum_add(sum, ((char *)data)[len - 1]);
+	return sum;
+}
+
+noinline uint16_t icmp6_checksum(struct sockaddr_in6 *src,
+				 struct sockaddr_in6 *dst,
+				 void *ptr, size_t len, uint8_t proto)
+{
+	struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		uint32_t payload_len;
+		uint8_t zero[3];
+		uint8_t nexthdr;
+	} pseudo_header = {};
+	uint32_t sum;
+
+	pseudo_header.saddr		= src->sin6_addr;
+	pseudo_header.daddr		= dst->sin6_addr;
+	pseudo_header.payload_len	= htonl(len);
+	pseudo_header.nexthdr		= proto;
+
+	sum = checksum6_nofold(&pseudo_header, sizeof(pseudo_header), 0);
+	sum = checksum6_nofold(ptr, len, sum);
+
+	return csum_fold(sum);
+}
+
+static void icmp6_interfere(int type, int code, uint32_t rcv_nxt,
+		struct sockaddr_in6 *src, struct sockaddr_in6 *dst)
+{
+	int sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
+	struct sockaddr_in6 dst_raw = *dst;
+	struct {
+		struct ipv6hdr iph;
+		struct icmp6hdr icmph;
+		struct ipv6hdr iphe;
+		struct {
+			uint16_t sport;
+			uint16_t dport;
+			uint32_t seq;
+		} tcph;
+	} packet = {};
+	size_t packet_len;
+	ssize_t bytes;
+
+
+	if (sk < 0)
+		test_error("socket(AF_INET6, SOCK_RAW, IPPROTO_RAW)");
+
+	packet_len = sizeof(packet) - sizeof(packet.iph);
+	set_ip6hdr(&packet.iph, packet_len, IPPROTO_ICMPV6, src, dst);
+
+	packet.icmph.icmp6_type = type;
+	packet.icmph.icmp6_code = code;
+
+	packet_len = sizeof(packet.iphe) + sizeof(packet.tcph);
+	set_ip6hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src);
+
+	packet.tcph.sport = dst->sin6_port;
+	packet.tcph.dport = src->sin6_port;
+	packet.tcph.seq = htonl(rcv_nxt);
+
+	packet_len = sizeof(packet) - sizeof(packet.iph);
+
+	packet.icmph.icmp6_cksum = icmp6_checksum(src, dst,
+			(void *)&packet.icmph, packet_len, IPPROTO_ICMPV6);
+
+	dst_raw.sin6_port = htons(IPPROTO_RAW);
+	bytes = sendto(sk, &packet, sizeof(packet), 0,
+		       (struct sockaddr *)&dst_raw, sizeof(dst_raw));
+	if (bytes != sizeof(packet))
+		test_error("send(): %zd", bytes);
+	icmps_sent++;
+
+	close(sk);
+}
+
+static uint32_t get_rcv_nxt(int sk)
+{
+	int val = TCP_REPAIR_ON;
+	uint32_t ret;
+	socklen_t sz = sizeof(ret);
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR)");
+	val = TCP_RECV_QUEUE;
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR_QUEUE)");
+	if (getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &ret, &sz))
+		test_error("getsockopt(TCP_QUEUE_SEQ)");
+	val = TCP_REPAIR_OFF_NO_WP;
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR)");
+	return ret;
+}
+
+static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *dst)
+{
+	struct sockaddr_in *saddr4 = src;
+	struct sockaddr_in *daddr4 = dst;
+	struct sockaddr_in6 *saddr6 = src;
+	struct sockaddr_in6 *daddr6 = dst;
+	size_t i;
+
+	if (saddr4->sin_family != daddr4->sin_family)
+		test_error("Different address families");
+
+	for (i = 0; i < nr; i++) {
+		if (saddr4->sin_family == AF_INET) {
+			icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PROT_UNREACH,
+					rcv_nxt, saddr4, daddr4);
+			icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PORT_UNREACH,
+					rcv_nxt, saddr4, daddr4);
+			icmp_interfere4(ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+					rcv_nxt, saddr4, daddr4);
+			icmps_sent += 3;
+		} else if (saddr4->sin_family == AF_INET6) {
+			icmp6_interfere(ICMPV6_DEST_UNREACH,
+					ICMPV6_ADM_PROHIBITED,
+					rcv_nxt, saddr6, daddr6);
+			icmp6_interfere(ICMPV6_DEST_UNREACH,
+					ICMPV6_PORT_UNREACH,
+					rcv_nxt, saddr6, daddr6);
+			icmps_sent += 2;
+		} else {
+			test_error("Not ip address family");
+		}
+	}
+}
+
+static void send_interfered(int sk)
+{
+	const unsigned int timeout = TEST_TIMEOUT_SEC;
+	struct sockaddr_in6 src, dst;
+	socklen_t addr_sz;
+
+	addr_sz = sizeof(src);
+	if (getsockname(sk, &src, &addr_sz))
+		test_error("getsockname()");
+	addr_sz = sizeof(dst);
+	if (getpeername(sk, &dst, &addr_sz))
+		test_error("getpeername()");
+
+	while (1) {
+		uint32_t rcv_nxt;
+
+		if (test_client_verify(sk, packet_size, packets_nr, timeout)) {
+			test_fail("client: connection is broken");
+			return;
+		}
+		packets_sent += packets_nr;
+		rcv_nxt = get_rcv_nxt(sk);
+		icmp_interfere(packets_nr, rcv_nxt, (void *)&src, (void *)&dst);
+	}
+}
+
+static void *client_fn(void *arg)
+{
+	int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads();
+	if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+		test_error("failed to connect()");
+	synchronize_threads();
+
+	send_interfered(sk);
+
+	/* Not expecting client to quit */
+	test_fail("client disconnected");
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(3, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
new file mode 100644
index 000000000000..24e62120b792
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -0,0 +1,1186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+const size_t nr_packets = 20;
+const size_t msg_len = 100;
+const size_t quota = nr_packets * msg_len;
+union tcp_addr wrong_addr;
+#define SECOND_PASSWORD	"at all times sincere friends of freedom have been rare"
+#define fault(type)	(inj == FAULT_ ## type)
+
+static const int test_vrf_ifindex = 200;
+static const uint8_t test_vrf_tabid = 42;
+static void setup_vrfs(void)
+{
+	int err;
+
+	if (!kernel_config_has(KCONFIG_NET_VRF))
+		return;
+
+	err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1);
+	if (err)
+		test_error("Failed to add a VRF: %d", err);
+
+	err = link_set_up("ksft-vrf");
+	if (err)
+		test_error("Failed to bring up a VRF");
+
+	err = ip_route_add_vrf(veth_name, TEST_FAMILY,
+			       this_ip_addr, this_ip_dest, test_vrf_tabid);
+	if (err)
+		test_error("Failed to add a route to VRF");
+}
+
+
+static int prepare_sk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid)
+{
+	int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 100, 100))
+		test_error("test_add_key()");
+
+	if (addr && test_add_key(sk, SECOND_PASSWORD, *addr,
+				 DEFAULT_TEST_PREFIX, sndid, rcvid))
+		test_error("test_add_key()");
+
+	return sk;
+}
+
+static int prepare_lsk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid)
+{
+	int sk = prepare_sk(addr, sndid, rcvid);
+
+	if (listen(sk, 10))
+		test_error("listen()");
+
+	return sk;
+}
+
+static int test_del_key(int sk, uint8_t sndid, uint8_t rcvid, bool async,
+			int current_key, int rnext_key)
+{
+	struct tcp_ao_info_opt ao_info = {};
+	struct tcp_ao_getsockopt key = {};
+	struct tcp_ao_del del = {};
+	sockaddr_af sockaddr;
+	int err;
+
+	tcp_addr_to_sockaddr_in(&del.addr, &this_ip_dest, 0);
+	del.prefix = DEFAULT_TEST_PREFIX;
+	del.sndid = sndid;
+	del.rcvid = rcvid;
+
+	if (current_key >= 0) {
+		del.set_current = 1;
+		del.current_key = (uint8_t)current_key;
+	}
+	if (rnext_key >= 0) {
+		del.set_rnext = 1;
+		del.rnext = (uint8_t)rnext_key;
+	}
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, &del, sizeof(del));
+	if (err < 0)
+		return -errno;
+
+	if (async)
+		return 0;
+
+	tcp_addr_to_sockaddr_in(&sockaddr, &this_ip_dest, 0);
+	err = test_get_one_ao(sk, &key, &sockaddr, sizeof(sockaddr),
+			      DEFAULT_TEST_PREFIX, sndid, rcvid);
+	if (!err)
+		return -EEXIST;
+	if (err != -E2BIG)
+		test_error("getsockopt()");
+	if (current_key < 0 && rnext_key < 0)
+		return 0;
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+	if (current_key >= 0 && ao_info.current_key != (uint8_t)current_key)
+		return -ENOTRECOVERABLE;
+	if (rnext_key >= 0 && ao_info.rnext != (uint8_t)rnext_key)
+		return -ENOTRECOVERABLE;
+	return 0;
+}
+
+static void try_delete_key(char *tst_name, int sk, uint8_t sndid, uint8_t rcvid,
+			   bool async, int current_key, int rnext_key,
+			   fault_t inj)
+{
+	int err;
+
+	err = test_del_key(sk, sndid, rcvid, async, current_key, rnext_key);
+	if ((err == -EBUSY && fault(BUSY)) || (err == -EINVAL && fault(CURRNEXT))) {
+		test_ok("%s: key deletion was prevented", tst_name);
+		return;
+	}
+	if (err && fault(FIXME)) {
+		test_xfail("%s: failed to delete the key %u:%u %d",
+			   tst_name, sndid, rcvid, err);
+		return;
+	}
+	if (!err) {
+		if (fault(BUSY) || fault(CURRNEXT)) {
+			test_fail("%s: the key was deleted %u:%u %d", tst_name,
+				  sndid, rcvid, err);
+		} else {
+			test_ok("%s: the key was deleted", tst_name);
+		}
+		return;
+	}
+	test_fail("%s: can't delete the key %u:%u %d", tst_name, sndid, rcvid, err);
+}
+
+static int test_set_key(int sk, int current_keyid, int rnext_keyid)
+{
+	struct tcp_ao_info_opt ao_info = {};
+	int err;
+
+	if (current_keyid >= 0) {
+		ao_info.set_current = 1;
+		ao_info.current_key = (uint8_t)current_keyid;
+	}
+	if (rnext_keyid >= 0) {
+		ao_info.set_rnext = 1;
+		ao_info.rnext = (uint8_t)rnext_keyid;
+	}
+
+	err = test_set_ao_info(sk, &ao_info);
+	if (err)
+		return err;
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+	if (current_keyid >= 0 && ao_info.current_key != (uint8_t)current_keyid)
+		return -ENOTRECOVERABLE;
+	if (rnext_keyid >= 0 && ao_info.rnext != (uint8_t)rnext_keyid)
+		return -ENOTRECOVERABLE;
+	return 0;
+}
+
+static int test_add_current_rnext_key(int sk, const char *key, uint8_t keyflags,
+				      union tcp_addr in_addr, uint8_t prefix,
+				      bool set_current, bool set_rnext,
+				      uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_add tmp = {};
+	int err;
+
+	err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr,
+			       set_current, set_rnext,
+			       prefix, 0, sndid, rcvid, 0, keyflags,
+			       strlen(key), key);
+	if (err)
+		return err;
+
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+	if (err < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+static int __try_add_current_rnext_key(int sk, const char *key, uint8_t keyflags,
+				       union tcp_addr in_addr, uint8_t prefix,
+				       bool set_current, bool set_rnext,
+				       uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_info_opt ao_info = {};
+	int err;
+
+	err = test_add_current_rnext_key(sk, key, keyflags, in_addr, prefix,
+					 set_current, set_rnext, sndid, rcvid);
+	if (err)
+		return err;
+
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+	if (set_current && ao_info.current_key != sndid)
+		return -ENOTRECOVERABLE;
+	if (set_rnext && ao_info.rnext != rcvid)
+		return -ENOTRECOVERABLE;
+	return 0;
+}
+
+static void try_add_current_rnext_key(char *tst_name, int sk, const char *key,
+				     uint8_t keyflags,
+				     union tcp_addr in_addr, uint8_t prefix,
+				     bool set_current, bool set_rnext,
+				     uint8_t sndid, uint8_t rcvid, fault_t inj)
+{
+	int err;
+
+	err = __try_add_current_rnext_key(sk, key, keyflags, in_addr, prefix,
+					  set_current, set_rnext, sndid, rcvid);
+	if (!err && !fault(CURRNEXT)) {
+		test_ok("%s", tst_name);
+		return;
+	}
+	if (err == -EINVAL && fault(CURRNEXT)) {
+		test_ok("%s", tst_name);
+		return;
+	}
+	test_fail("%s", tst_name);
+}
+
+static void check_closed_socket(void)
+{
+	int sk;
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	try_delete_key("closed socket, delete a key", sk, 200, 200, 0, -1, -1, 0);
+	try_delete_key("closed socket, delete all keys", sk, 100, 100, 0, -1, -1, 0);
+	close(sk);
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	if (test_set_key(sk, 100, 200))
+		test_error("failed to set current/rnext keys");
+	try_delete_key("closed socket, delete current key", sk, 100, 100, 0, -1, -1, FAULT_BUSY);
+	try_delete_key("closed socket, delete rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY);
+	close(sk);
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	if (test_add_key(sk, "Glory to heros!", this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 10, 11))
+		test_error("test_add_key()");
+	if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 12, 13))
+		test_error("test_add_key()");
+	try_delete_key("closed socket, delete a key + set current/rnext", sk, 100, 100, 0, 10, 13, 0);
+	try_delete_key("closed socket, force-delete current key", sk, 10, 11, 0, 200, -1, 0);
+	try_delete_key("closed socket, force-delete rnext key", sk, 12, 13, 0, -1, 200, 0);
+	try_delete_key("closed socket, delete current+rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY);
+	close(sk);
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	if (test_set_key(sk, 100, 200))
+		test_error("failed to set current/rnext keys");
+	try_add_current_rnext_key("closed socket, add + change current key",
+				  sk, "Laaaa! Lalala-la-la-lalala...", 0,
+				  this_ip_dest, DEFAULT_TEST_PREFIX,
+				  true, false, 10, 20, 0);
+	try_add_current_rnext_key("closed socket, add + change rnext key",
+				  sk, "Laaaa! Lalala-la-la-lalala...", 0,
+				  this_ip_dest, DEFAULT_TEST_PREFIX,
+				  false, true, 20, 10, 0);
+	close(sk);
+}
+
+static void assert_no_current_rnext(const char *tst_msg, int sk)
+{
+	struct tcp_ao_info_opt ao_info = {};
+
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+
+	errno = 0;
+	if (ao_info.set_current || ao_info.set_rnext) {
+		test_xfail("%s: the socket has current/rnext keys: %d:%d",
+			   tst_msg,
+			   (ao_info.set_current) ? ao_info.current_key : -1,
+			   (ao_info.set_rnext) ? ao_info.rnext : -1);
+	} else {
+		test_ok("%s: the socket has no current/rnext keys", tst_msg);
+	}
+}
+
+static void assert_no_tcp_repair(void)
+{
+	struct tcp_ao_repair ao_img = {};
+	socklen_t len = sizeof(ao_img);
+	int sk, err;
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	test_enable_repair(sk);
+	if (listen(sk, 10))
+		test_error("listen()");
+	errno = 0;
+	err = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, &len);
+	if (err && errno == EPERM)
+		test_ok("listen socket, getsockopt(TCP_AO_REPAIR) is restricted");
+	else
+		test_fail("listen socket, getsockopt(TCP_AO_REPAIR) works");
+	errno = 0;
+	err = setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, sizeof(ao_img));
+	if (err && errno == EPERM)
+		test_ok("listen socket, setsockopt(TCP_AO_REPAIR) is restricted");
+	else
+		test_fail("listen socket, setsockopt(TCP_AO_REPAIR) works");
+	close(sk);
+}
+
+static void check_listen_socket(void)
+{
+	int sk, err;
+
+	sk = prepare_lsk(&this_ip_dest, 200, 200);
+	try_delete_key("listen socket, delete a key", sk, 200, 200, 0, -1, -1, 0);
+	try_delete_key("listen socket, delete all keys", sk, 100, 100, 0, -1, -1, 0);
+	close(sk);
+
+	sk = prepare_lsk(&this_ip_dest, 200, 200);
+	err = test_set_key(sk, 100, -1);
+	if (err == -EINVAL)
+		test_ok("listen socket, setting current key not allowed");
+	else
+		test_fail("listen socket, set current key");
+	err = test_set_key(sk, -1, 200);
+	if (err == -EINVAL)
+		test_ok("listen socket, setting rnext key not allowed");
+	else
+		test_fail("listen socket, set rnext key");
+	close(sk);
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	if (test_set_key(sk, 100, 200))
+		test_error("failed to set current/rnext keys");
+	if (listen(sk, 10))
+		test_error("listen()");
+	assert_no_current_rnext("listen() after current/rnext keys set", sk);
+	try_delete_key("listen socket, delete current key from before listen()", sk, 100, 100, 0, -1, -1, FAULT_FIXME);
+	try_delete_key("listen socket, delete rnext key from before listen()", sk, 200, 200, 0, -1, -1, FAULT_FIXME);
+	close(sk);
+
+	assert_no_tcp_repair();
+
+	sk = prepare_lsk(&this_ip_dest, 200, 200);
+	if (test_add_key(sk, "Glory to heros!", this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 10, 11))
+		test_error("test_add_key()");
+	if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 12, 13))
+		test_error("test_add_key()");
+	try_delete_key("listen socket, delete a key + set current/rnext", sk,
+		       100, 100, 0, 10, 13, FAULT_CURRNEXT);
+	try_delete_key("listen socket, force-delete current key", sk,
+		       10, 11, 0, 200, -1, FAULT_CURRNEXT);
+	try_delete_key("listen socket, force-delete rnext key", sk,
+		       12, 13, 0, -1, 200, FAULT_CURRNEXT);
+	try_delete_key("listen socket, delete a key", sk,
+		       200, 200, 0, -1, -1, 0);
+	close(sk);
+
+	sk = prepare_lsk(&this_ip_dest, 200, 200);
+	try_add_current_rnext_key("listen socket, add + change current key",
+				  sk, "Laaaa! Lalala-la-la-lalala...", 0,
+				  this_ip_dest, DEFAULT_TEST_PREFIX,
+				  true, false, 10, 20, FAULT_CURRNEXT);
+	try_add_current_rnext_key("listen socket, add + change rnext key",
+				  sk, "Laaaa! Lalala-la-la-lalala...", 0,
+				  this_ip_dest, DEFAULT_TEST_PREFIX,
+				  false, true, 20, 10, FAULT_CURRNEXT);
+	close(sk);
+}
+
+static const char *fips_fpath = "/proc/sys/crypto/fips_enabled";
+static bool is_fips_enabled(void)
+{
+	static int fips_checked = -1;
+	FILE *fenabled;
+	int enabled;
+
+	if (fips_checked >= 0)
+		return !!fips_checked;
+	if (access(fips_fpath, R_OK)) {
+		if (errno != ENOENT)
+			test_error("Can't open %s", fips_fpath);
+		fips_checked = 0;
+		return false;
+	}
+	fenabled = fopen(fips_fpath, "r");
+	if (!fenabled)
+		test_error("Can't open %s", fips_fpath);
+	if (fscanf(fenabled, "%d", &enabled) != 1)
+		test_error("Can't read from %s", fips_fpath);
+	fclose(fenabled);
+	fips_checked = !!enabled;
+	return !!fips_checked;
+}
+
+struct test_key {
+	char password[TCP_AO_MAXKEYLEN];
+	const char *alg;
+	unsigned int len;
+	uint8_t client_keyid;
+	uint8_t server_keyid;
+	uint8_t maclen;
+	uint8_t matches_client		: 1,
+		matches_server		: 1,
+		matches_vrf		: 1,
+		is_current		: 1,
+		is_rnext		: 1,
+		used_on_server_tx	: 1,
+		used_on_client_tx	: 1,
+		skip_counters_checks	: 1;
+};
+
+struct key_collection {
+	unsigned int nr_keys;
+	struct test_key *keys;
+};
+
+static struct key_collection collection;
+
+#define TEST_MAX_MACLEN		16
+const char *test_algos[] = {
+	"cmac(aes128)",
+	"hmac(sha1)", "hmac(sha512)", "hmac(sha384)", "hmac(sha256)",
+	"hmac(sha224)", "hmac(sha3-512)",
+	/* only if !CONFIG_FIPS */
+#define TEST_NON_FIPS_ALGOS	2
+	"hmac(rmd160)", "hmac(md5)"
+};
+const unsigned int test_maclens[] = { 1, 4, 12, 16 };
+#define MACLEN_SHIFT		2
+#define ALGOS_SHIFT		4
+
+static unsigned int make_mask(unsigned int shift, unsigned int prev_shift)
+{
+	unsigned int ret = BIT(shift) - 1;
+
+	return ret << prev_shift;
+}
+
+static void init_key_in_collection(unsigned int index, bool randomized)
+{
+	struct test_key *key = &collection.keys[index];
+	unsigned int algos_nr, algos_index;
+
+	/* Same for randomized and non-randomized test flows */
+	key->client_keyid = index;
+	key->server_keyid = 127 + index;
+	key->matches_client = 1;
+	key->matches_server = 1;
+	key->matches_vrf = 1;
+	/* not really even random, but good enough for a test */
+	key->len = rand() % (TCP_AO_MAXKEYLEN - TEST_TCP_AO_MINKEYLEN);
+	key->len += TEST_TCP_AO_MINKEYLEN;
+	randomize_buffer(key->password, key->len);
+
+	if (randomized) {
+		key->maclen = (rand() % TEST_MAX_MACLEN) + 1;
+		algos_index = rand();
+	} else {
+		unsigned int shift = MACLEN_SHIFT;
+
+		key->maclen = test_maclens[index & make_mask(shift, 0)];
+		algos_index = index & make_mask(ALGOS_SHIFT, shift);
+	}
+	algos_nr = ARRAY_SIZE(test_algos);
+	if (is_fips_enabled())
+		algos_nr -= TEST_NON_FIPS_ALGOS;
+	key->alg = test_algos[algos_index % algos_nr];
+}
+
+static int init_default_key_collection(unsigned int nr_keys, bool randomized)
+{
+	size_t key_sz = sizeof(collection.keys[0]);
+
+	if (!nr_keys) {
+		free(collection.keys);
+		collection.keys = NULL;
+		return 0;
+	}
+
+	/*
+	 * All keys have uniq sndid/rcvid and sndid != rcvid in order to
+	 * check for any bugs/issues for different keyids, visible to both
+	 * peers. Keyid == 254 is unused.
+	 */
+	if (nr_keys > 127)
+		test_error("Test requires too many keys, correct the source");
+
+	collection.keys = reallocarray(collection.keys, nr_keys, key_sz);
+	if (!collection.keys)
+		return -ENOMEM;
+
+	memset(collection.keys, 0, nr_keys * key_sz);
+	collection.nr_keys = nr_keys;
+	while (nr_keys--)
+		init_key_in_collection(nr_keys, randomized);
+
+	return 0;
+}
+
+static void test_key_error(const char *msg, struct test_key *key)
+{
+	test_error("%s: key: { %s, %u:%u, %u, %u:%u:%u:%u:%u (%u)}",
+		   msg, key->alg, key->client_keyid, key->server_keyid,
+		   key->maclen, key->matches_client, key->matches_server,
+		   key->matches_vrf, key->is_current, key->is_rnext, key->len);
+}
+
+static int test_add_key_cr(int sk, const char *pwd, unsigned int pwd_len,
+			   union tcp_addr addr, uint8_t vrf,
+			   uint8_t sndid, uint8_t rcvid,
+			   uint8_t maclen, const char *alg,
+			   bool set_current, bool set_rnext)
+{
+	struct tcp_ao_add tmp = {};
+	uint8_t keyflags = 0;
+	int err;
+
+	if (!alg)
+		alg = DEFAULT_TEST_ALGO;
+
+	if (vrf)
+		keyflags |= TCP_AO_KEYF_IFINDEX;
+	err = test_prepare_key(&tmp, alg, addr, set_current, set_rnext,
+			       DEFAULT_TEST_PREFIX, vrf, sndid, rcvid, maclen,
+			       keyflags, pwd_len, pwd);
+	if (err)
+		return err;
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+	if (err < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+static void verify_current_rnext(const char *tst, int sk,
+				 int current_keyid, int rnext_keyid)
+{
+	struct tcp_ao_info_opt ao_info = {};
+
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+
+	errno = 0;
+	if (current_keyid >= 0) {
+		if (!ao_info.set_current)
+			test_fail("%s: the socket doesn't have current key", tst);
+		else if (ao_info.current_key != current_keyid)
+			test_fail("%s: current key is not the expected one %d != %u",
+				  tst, current_keyid, ao_info.current_key);
+		else
+			test_ok("%s: current key %u as expected",
+				tst, ao_info.current_key);
+	}
+	if (rnext_keyid >= 0) {
+		if (!ao_info.set_rnext)
+			test_fail("%s: the socket doesn't have rnext key", tst);
+		else if (ao_info.rnext != rnext_keyid)
+			test_fail("%s: rnext key is not the expected one %d != %u",
+				  tst, rnext_keyid, ao_info.rnext);
+		else
+			test_ok("%s: rnext key %u as expected", tst, ao_info.rnext);
+	}
+}
+
+
+static int key_collection_socket(bool server, unsigned int port)
+{
+	unsigned int i;
+	int sk;
+
+	if (server)
+		sk = test_listen_socket(this_ip_addr, port, 1);
+	else
+		sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	for (i = 0; i < collection.nr_keys; i++) {
+		struct test_key *key = &collection.keys[i];
+		union tcp_addr *addr = &wrong_addr;
+		uint8_t sndid, rcvid, vrf;
+		bool set_current = false, set_rnext = false;
+
+		if (key->matches_vrf)
+			vrf = 0;
+		else
+			vrf = test_vrf_ifindex;
+		if (server) {
+			if (key->matches_client)
+				addr = &this_ip_dest;
+			sndid = key->server_keyid;
+			rcvid = key->client_keyid;
+		} else {
+			if (key->matches_server)
+				addr = &this_ip_dest;
+			sndid = key->client_keyid;
+			rcvid = key->server_keyid;
+			key->used_on_client_tx = set_current = key->is_current;
+			key->used_on_server_tx = set_rnext = key->is_rnext;
+		}
+
+		if (test_add_key_cr(sk, key->password, key->len,
+				    *addr, vrf, sndid, rcvid, key->maclen,
+				    key->alg, set_current, set_rnext))
+			test_key_error("setsockopt(TCP_AO_ADD_KEY)", key);
+#ifdef DEBUG
+		test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}",
+			   server ? "server" : "client", i, collection.nr_keys,
+			   key->alg, rcvid, sndid, key->maclen,
+			   key->matches_client, key->matches_server,
+			   key->is_current, key->is_rnext, key->len);
+#endif
+	}
+	return sk;
+}
+
+static void verify_counters(const char *tst_name, bool is_listen_sk, bool server,
+			    struct tcp_ao_counters *a, struct tcp_ao_counters *b)
+{
+	unsigned int i;
+
+	__test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD);
+
+	for (i = 0; i < collection.nr_keys; i++) {
+		struct test_key *key = &collection.keys[i];
+		uint8_t sndid, rcvid;
+		bool rx_cnt_expected;
+
+		if (key->skip_counters_checks)
+			continue;
+		if (server) {
+			sndid = key->server_keyid;
+			rcvid = key->client_keyid;
+			rx_cnt_expected = key->used_on_client_tx;
+		} else {
+			sndid = key->client_keyid;
+			rcvid = key->server_keyid;
+			rx_cnt_expected = key->used_on_server_tx;
+		}
+
+		test_tcp_ao_key_counters_cmp(tst_name, a, b,
+					     rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
+					     sndid, rcvid);
+	}
+	test_tcp_ao_counters_free(a);
+	test_tcp_ao_counters_free(b);
+	test_ok("%s: passed counters checks", tst_name);
+}
+
+static struct tcp_ao_getsockopt *lookup_key(struct tcp_ao_getsockopt *buf,
+					    size_t len, int sndid, int rcvid)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (sndid >= 0 && buf[i].sndid != sndid)
+			continue;
+		if (rcvid >= 0 && buf[i].rcvid != rcvid)
+			continue;
+		return &buf[i];
+	}
+	return NULL;
+}
+
+static void verify_keys(const char *tst_name, int sk,
+			bool is_listen_sk, bool server)
+{
+	socklen_t len = sizeof(struct tcp_ao_getsockopt);
+	struct tcp_ao_getsockopt *keys;
+	bool passed_test = true;
+	unsigned int i;
+
+	keys = calloc(collection.nr_keys, len);
+	if (!keys)
+		test_error("calloc()");
+
+	keys->nkeys = collection.nr_keys;
+	keys->get_all = 1;
+
+	if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, keys, &len)) {
+		free(keys);
+		test_error("getsockopt(TCP_AO_GET_KEYS)");
+	}
+
+	for (i = 0; i < collection.nr_keys; i++) {
+		struct test_key *key = &collection.keys[i];
+		struct tcp_ao_getsockopt *dump_key;
+		bool is_kdf_aes_128_cmac = false;
+		bool is_cmac_aes = false;
+		uint8_t sndid, rcvid;
+		bool matches = false;
+
+		if (server) {
+			if (key->matches_client)
+				matches = true;
+			sndid = key->server_keyid;
+			rcvid = key->client_keyid;
+		} else {
+			if (key->matches_server)
+				matches = true;
+			sndid = key->client_keyid;
+			rcvid = key->server_keyid;
+		}
+		if (!key->matches_vrf)
+			matches = false;
+		/* no keys get removed on the original listener socket */
+		if (is_listen_sk)
+			matches = true;
+
+		dump_key = lookup_key(keys, keys->nkeys, sndid, rcvid);
+		if (matches != !!dump_key) {
+			test_fail("%s: key %u:%u %s%s on the socket",
+				  tst_name, sndid, rcvid,
+				  key->matches_vrf ? "" : "[vrf] ",
+				  matches ? "disappeared" : "yet present");
+			passed_test = false;
+			goto out;
+		}
+		if (!dump_key)
+			continue;
+
+		if (!strcmp("cmac(aes128)", key->alg)) {
+			is_kdf_aes_128_cmac = (key->len != 16);
+			is_cmac_aes = true;
+		}
+
+		if (is_cmac_aes) {
+			if (strcmp(dump_key->alg_name, "cmac(aes)")) {
+				test_fail("%s: key %u:%u cmac(aes) has unexpected alg %s",
+					  tst_name, sndid, rcvid,
+					  dump_key->alg_name);
+				passed_test = false;
+				continue;
+			}
+		} else if (strcmp(dump_key->alg_name, key->alg)) {
+			test_fail("%s: key %u:%u has unexpected alg %s != %s",
+				  tst_name, sndid, rcvid,
+				  dump_key->alg_name, key->alg);
+			passed_test = false;
+			continue;
+		}
+		if (is_kdf_aes_128_cmac) {
+			if (dump_key->keylen != 16) {
+				test_fail("%s: key %u:%u cmac(aes128) has unexpected len %u",
+					  tst_name, sndid, rcvid,
+					  dump_key->keylen);
+				continue;
+			}
+		} else if (dump_key->keylen != key->len) {
+			test_fail("%s: key %u:%u changed password len %u != %u",
+				  tst_name, sndid, rcvid,
+				  dump_key->keylen, key->len);
+			passed_test = false;
+			continue;
+		}
+		if (!is_kdf_aes_128_cmac &&
+		    memcmp(dump_key->key, key->password, key->len)) {
+			test_fail("%s: key %u:%u has different password",
+				  tst_name, sndid, rcvid);
+			passed_test = false;
+			continue;
+		}
+		if (dump_key->maclen != key->maclen) {
+			test_fail("%s: key %u:%u changed maclen %u != %u",
+				  tst_name, sndid, rcvid,
+				  dump_key->maclen, key->maclen);
+			passed_test = false;
+			continue;
+		}
+	}
+
+	if (passed_test)
+		test_ok("%s: The socket keys are consistent with the expectations",
+			tst_name);
+out:
+	free(keys);
+}
+
+static int start_server(const char *tst_name, unsigned int port, size_t quota,
+			struct tcp_ao_counters *begin,
+			unsigned int current_index, unsigned int rnext_index)
+{
+	struct tcp_ao_counters lsk_c1, lsk_c2;
+	ssize_t bytes;
+	int sk, lsk;
+
+	synchronize_threads(); /* 1: key collection initialized */
+	lsk = key_collection_socket(true, port);
+	if (test_get_tcp_ao_counters(lsk, &lsk_c1))
+		test_error("test_get_tcp_ao_counters()");
+	synchronize_threads(); /* 2: MKTs added => connect() */
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+	if (test_get_tcp_ao_counters(sk, begin))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 3: accepted => send data */
+	if (test_get_tcp_ao_counters(lsk, &lsk_c2))
+		test_error("test_get_tcp_ao_counters()");
+	verify_keys(tst_name, lsk, true, true);
+	close(lsk);
+
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota)
+		test_fail("%s: server served: %zd", tst_name, bytes);
+	else
+		test_ok("%s: server alive", tst_name);
+
+	verify_counters(tst_name, true, true, &lsk_c1, &lsk_c2);
+
+	return sk;
+}
+
+static void end_server(const char *tst_name, int sk,
+		       struct tcp_ao_counters *begin)
+{
+	struct tcp_ao_counters end;
+
+	if (test_get_tcp_ao_counters(sk, &end))
+		test_error("test_get_tcp_ao_counters()");
+	verify_keys(tst_name, sk, false, true);
+
+	synchronize_threads(); /* 4: verified => closed */
+	close(sk);
+
+	verify_counters(tst_name, false, true, begin, &end);
+	synchronize_threads(); /* 5: counters */
+}
+
+static void try_server_run(const char *tst_name, unsigned int port, size_t quota,
+			   unsigned int current_index, unsigned int rnext_index)
+{
+	struct tcp_ao_counters tmp;
+	int sk;
+
+	sk = start_server(tst_name, port, quota, &tmp,
+			  current_index, rnext_index);
+	end_server(tst_name, sk, &tmp);
+}
+
+static void server_rotations(const char *tst_name, unsigned int port,
+			     size_t quota, unsigned int rotations,
+			     unsigned int current_index, unsigned int rnext_index)
+{
+	struct tcp_ao_counters tmp;
+	unsigned int i;
+	int sk;
+
+	sk = start_server(tst_name, port, quota, &tmp,
+			  current_index, rnext_index);
+
+	for (i = current_index + 1; rotations > 0; i++, rotations--) {
+		ssize_t bytes;
+
+		if (i >= collection.nr_keys)
+			i = 0;
+		bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+		if (bytes != quota) {
+			test_fail("%s: server served: %zd", tst_name, bytes);
+			return;
+		}
+		verify_current_rnext(tst_name, sk,
+				     collection.keys[i].server_keyid, -1);
+		synchronize_threads(); /* verify current/rnext */
+	}
+	end_server(tst_name, sk, &tmp);
+}
+
+static int run_client(const char *tst_name, unsigned int port,
+		      unsigned int nr_keys, int current_index, int rnext_index,
+		      struct tcp_ao_counters *before,
+		      const size_t msg_sz, const size_t msg_nr)
+{
+	int sk;
+
+	synchronize_threads(); /* 1: key collection initialized */
+	sk = key_collection_socket(false, port);
+
+	if (current_index >= 0 || rnext_index >= 0) {
+		int sndid = -1, rcvid = -1;
+
+		if (current_index >= 0)
+			sndid = collection.keys[current_index].client_keyid;
+		if (rnext_index >= 0)
+			rcvid = collection.keys[rnext_index].server_keyid;
+		if (test_set_key(sk, sndid, rcvid))
+			test_error("failed to set current/rnext keys");
+	}
+	if (before && test_get_tcp_ao_counters(sk, before))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 2: MKTs added => connect() */
+	if (test_connect_socket(sk, this_ip_dest, port++) <= 0)
+		test_error("failed to connect()");
+	if (current_index < 0)
+		current_index = nr_keys - 1;
+	if (rnext_index < 0)
+		rnext_index = nr_keys - 1;
+	collection.keys[current_index].used_on_client_tx = 1;
+	collection.keys[rnext_index].used_on_server_tx = 1;
+
+	synchronize_threads(); /* 3: accepted => send data */
+	if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
+		test_fail("verify failed");
+		close(sk);
+		if (before)
+			test_tcp_ao_counters_free(before);
+		return -1;
+	}
+
+	return sk;
+}
+
+static int start_client(const char *tst_name, unsigned int port,
+			unsigned int nr_keys, int current_index, int rnext_index,
+			struct tcp_ao_counters *before,
+			const size_t msg_sz, const size_t msg_nr)
+{
+	if (init_default_key_collection(nr_keys, true))
+		test_error("Failed to init the key collection");
+
+	return run_client(tst_name, port, nr_keys, current_index,
+			  rnext_index, before, msg_sz, msg_nr);
+}
+
+static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
+		       int current_index, int rnext_index,
+		       struct tcp_ao_counters *start)
+{
+	struct tcp_ao_counters end;
+
+	/* Some application may become dependent on this kernel choice */
+	if (current_index < 0)
+		current_index = nr_keys - 1;
+	if (rnext_index < 0)
+		rnext_index = nr_keys - 1;
+	verify_current_rnext(tst_name, sk,
+			     collection.keys[current_index].client_keyid,
+			     collection.keys[rnext_index].server_keyid);
+	if (start && test_get_tcp_ao_counters(sk, &end))
+		test_error("test_get_tcp_ao_counters()");
+	verify_keys(tst_name, sk, false, false);
+	synchronize_threads(); /* 4: verify => closed */
+	close(sk);
+	if (start)
+		verify_counters(tst_name, false, false, start, &end);
+	synchronize_threads(); /* 5: counters */
+}
+
+static void try_unmatched_keys(int sk, int *rnext_index)
+{
+	struct test_key *key;
+	unsigned int i = 0;
+	int err;
+
+	do {
+		key = &collection.keys[i];
+		if (!key->matches_server)
+			break;
+	} while (++i < collection.nr_keys);
+	if (key->matches_server)
+		test_error("all keys on client match the server");
+
+	err = test_add_key_cr(sk, key->password, key->len, wrong_addr,
+			      0, key->client_keyid, key->server_keyid,
+			      key->maclen, key->alg, 0, 0);
+	if (!err) {
+		test_fail("Added a key with non-matching ip-address for established sk");
+		return;
+	}
+	if (err == -EINVAL)
+		test_ok("Can't add a key with non-matching ip-address for established sk");
+	else
+		test_error("Failed to add a key");
+
+	err = test_add_key_cr(sk, key->password, key->len, this_ip_dest,
+			      test_vrf_ifindex,
+			      key->client_keyid, key->server_keyid,
+			      key->maclen, key->alg, 0, 0);
+	if (!err) {
+		test_fail("Added a key with non-matching VRF for established sk");
+		return;
+	}
+	if (err == -EINVAL)
+		test_ok("Can't add a key with non-matching VRF for established sk");
+	else
+		test_error("Failed to add a key");
+
+	for (i = 0; i < collection.nr_keys; i++) {
+		key = &collection.keys[i];
+		if (!key->matches_client)
+			break;
+	}
+	if (key->matches_client)
+		test_error("all keys on server match the client");
+	if (test_set_key(sk, -1, key->server_keyid))
+		test_error("Can't change the current key");
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+		test_fail("verify failed");
+	*rnext_index = i;
+}
+
+static int client_non_matching(const char *tst_name, unsigned int port,
+			       unsigned int nr_keys,
+			       int current_index, int rnext_index,
+			       const size_t msg_sz, const size_t msg_nr)
+{
+	unsigned int i;
+
+	if (init_default_key_collection(nr_keys, true))
+		test_error("Failed to init the key collection");
+
+	for (i = 0; i < nr_keys; i++) {
+		/* key (0, 0) matches */
+		collection.keys[i].matches_client = !!((i + 3) % 4);
+		collection.keys[i].matches_server = !!((i + 2) % 4);
+		if (kernel_config_has(KCONFIG_NET_VRF))
+			collection.keys[i].matches_vrf = !!((i + 1) % 4);
+	}
+
+	return run_client(tst_name, port, nr_keys, current_index,
+			  rnext_index, NULL, msg_sz, msg_nr);
+}
+
+static void check_current_back(const char *tst_name, unsigned int port,
+			       unsigned int nr_keys,
+			       unsigned int current_index, unsigned int rnext_index,
+			       unsigned int rotate_to_index)
+{
+	struct tcp_ao_counters tmp;
+	int sk;
+
+	sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+			  &tmp, msg_len, nr_packets);
+	if (sk < 0)
+		return;
+	if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1))
+		test_error("Can't change the current key");
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+		test_fail("verify failed");
+	/* There is a race here: between setting the current_key with
+	 * setsockopt(TCP_AO_INFO) and starting to send some data - there
+	 * might have been a segment received with the desired
+	 * RNext_key set. In turn that would mean that the first outgoing
+	 * segment will have the desired current_key (flipped back).
+	 * Which is what the user/test wants. As it's racy, skip checking
+	 * the counters, yet check what are the resulting current/rnext
+	 * keys on both sides.
+	 */
+	collection.keys[rotate_to_index].skip_counters_checks = 1;
+
+	end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void roll_over_keys(const char *tst_name, unsigned int port,
+			   unsigned int nr_keys, unsigned int rotations,
+			   unsigned int current_index, unsigned int rnext_index)
+{
+	struct tcp_ao_counters tmp;
+	unsigned int i;
+	int sk;
+
+	sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+			  &tmp, msg_len, nr_packets);
+	if (sk < 0)
+		return;
+	for (i = rnext_index + 1; rotations > 0; i++, rotations--) {
+		if (i >= collection.nr_keys)
+			i = 0;
+		if (test_set_key(sk, -1, collection.keys[i].server_keyid))
+			test_error("Can't change the Rnext key");
+		if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+			test_fail("verify failed");
+			close(sk);
+			test_tcp_ao_counters_free(&tmp);
+			return;
+		}
+		verify_current_rnext(tst_name, sk, -1,
+				     collection.keys[i].server_keyid);
+		collection.keys[i].used_on_server_tx = 1;
+		synchronize_threads(); /* verify current/rnext */
+	}
+	end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void try_client_run(const char *tst_name, unsigned int port,
+			   unsigned int nr_keys, int current_index, int rnext_index)
+{
+	struct tcp_ao_counters tmp;
+	int sk;
+
+	sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+			  &tmp, msg_len, nr_packets);
+	if (sk < 0)
+		return;
+	end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void try_client_match(const char *tst_name, unsigned int port,
+			     unsigned int nr_keys,
+			     int current_index, int rnext_index)
+{
+	int sk;
+
+	sk = client_non_matching(tst_name, port, nr_keys, current_index,
+				 rnext_index, msg_len, nr_packets);
+	if (sk < 0)
+		return;
+	try_unmatched_keys(sk, &rnext_index);
+	end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL);
+}
+
+static void *server_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+
+	setup_vrfs();
+	try_server_run("server: Check current/rnext keys unset before connect()",
+		       port++, quota, 19, 19);
+	try_server_run("server: Check current/rnext keys set before connect()",
+		       port++, quota, 10, 10);
+	try_server_run("server: Check current != rnext keys set before connect()",
+		       port++, quota, 5, 10);
+	try_server_run("server: Check current flapping back on peer's RnextKey request",
+		       port++, quota * 2, 5, 10);
+	server_rotations("server: Rotate over all different keys", port++,
+			 quota, 20, 0, 0);
+	try_server_run("server: Check accept() => established key matching",
+		       port++, quota * 2, 0, 0);
+
+	synchronize_threads(); /* don't race to exit: client exits */
+	return NULL;
+}
+
+static void check_established_socket(void)
+{
+	unsigned int port = test_server_port;
+
+	setup_vrfs();
+	try_client_run("client: Check current/rnext keys unset before connect()",
+		       port++, 20, -1, -1);
+	try_client_run("client: Check current/rnext keys set before connect()",
+		       port++, 20, 10, 10);
+	try_client_run("client: Check current != rnext keys set before connect()",
+		       port++, 20, 10, 5);
+	check_current_back("client: Check current flapping back on peer's RnextKey request",
+			   port++, 20, 10, 5, 2);
+	roll_over_keys("client: Rotate over all different keys", port++,
+		       20, 20, 0, 0);
+	try_client_match("client: Check connect() => established key matching",
+			 port++, 20, 0, 0);
+}
+
+static void *client_fn(void *arg)
+{
+	if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_WRONG_IP);
+	check_closed_socket();
+	check_listen_socket();
+	check_established_socket();
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(120, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
new file mode 100644
index 000000000000..fbc7f6111815
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * TCP-AO selftest library. Provides helpers to unshare network
+ * namespaces, create veth, assign ip addresses, set routes,
+ * manipulate socket options, read network counter and etc.
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+#ifndef _AOLIB_H_
+#define _AOLIB_H_
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <linux/snmp.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../../../../../include/linux/stringify.h"
+#include "../../../../../include/linux/bits.h"
+
+#ifndef SOL_TCP
+/* can't include <netinet/tcp.h> as including <linux/tcp.h> */
+# define SOL_TCP		6	/* TCP level */
+#endif
+
+/* Working around ksft, see the comment in lib/setup.c */
+extern void __test_msg(const char *buf);
+extern void __test_ok(const char *buf);
+extern void __test_fail(const char *buf);
+extern void __test_xfail(const char *buf);
+extern void __test_error(const char *buf);
+extern void __test_skip(const char *buf);
+
+__attribute__((__format__(__printf__, 2, 3)))
+static inline void __test_print(void (*fn)(const char *), const char *fmt, ...)
+{
+#define TEST_MSG_BUFFER_SIZE 4096
+	char buf[TEST_MSG_BUFFER_SIZE];
+	va_list arg;
+
+	va_start(arg, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, arg);
+	va_end(arg);
+	fn(buf);
+}
+
+#define test_print(fmt, ...)						\
+	__test_print(__test_msg, "%ld[%s:%u] " fmt "\n",		\
+		     syscall(SYS_gettid),				\
+		     __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define test_ok(fmt, ...)						\
+	__test_print(__test_ok, fmt "\n", ##__VA_ARGS__)
+#define test_skip(fmt, ...)						\
+	__test_print(__test_skip, fmt "\n", ##__VA_ARGS__)
+#define test_xfail(fmt, ...)						\
+	__test_print(__test_xfail, fmt "\n", ##__VA_ARGS__)
+
+#define test_fail(fmt, ...)						\
+do {									\
+	if (errno)							\
+		__test_print(__test_fail, fmt ": %m\n", ##__VA_ARGS__);	\
+	else								\
+		__test_print(__test_fail, fmt "\n", ##__VA_ARGS__);	\
+	test_failed();							\
+} while (0)
+
+#define KSFT_FAIL  1
+#define test_error(fmt, ...)						\
+do {									\
+	if (errno)							\
+		__test_print(__test_error, "%ld[%s:%u] " fmt ": %m\n",	\
+			     syscall(SYS_gettid), __FILE__, __LINE__,	\
+			     ##__VA_ARGS__);				\
+	else								\
+		__test_print(__test_error, "%ld[%s:%u] " fmt "\n",	\
+			     syscall(SYS_gettid), __FILE__, __LINE__,	\
+			     ##__VA_ARGS__);				\
+	exit(KSFT_FAIL);						\
+} while (0)
+
+enum test_fault {
+	FAULT_TIMEOUT = 1,
+	FAULT_KEYREJECT,
+	FAULT_PREINSTALL_AO,
+	FAULT_PREINSTALL_MD5,
+	FAULT_POSTINSTALL,
+	FAULT_BUSY,
+	FAULT_CURRNEXT,
+	FAULT_FIXME,
+};
+typedef enum test_fault fault_t;
+
+enum test_needs_kconfig {
+	KCONFIG_NET_NS = 0,		/* required */
+	KCONFIG_VETH,			/* required */
+	KCONFIG_TCP_AO,			/* required */
+	KCONFIG_TCP_MD5,		/* optional, for TCP-MD5 features */
+	KCONFIG_NET_VRF,		/* optional, for L3/VRF testing */
+	__KCONFIG_LAST__
+};
+extern bool kernel_config_has(enum test_needs_kconfig k);
+extern const char *tests_skip_reason[__KCONFIG_LAST__];
+static inline bool should_skip_test(const char *tst_name,
+				    enum test_needs_kconfig k)
+{
+	if (kernel_config_has(k))
+		return false;
+	test_skip("%s: %s", tst_name, tests_skip_reason[k]);
+	return true;
+}
+
+union tcp_addr {
+	struct in_addr a4;
+	struct in6_addr a6;
+};
+
+typedef void *(*thread_fn)(void *);
+extern void test_failed(void);
+extern void __test_init(unsigned int ntests, int family, unsigned int prefix,
+			union tcp_addr addr1, union tcp_addr addr2,
+			thread_fn peer1, thread_fn peer2);
+
+static inline void test_init2(unsigned int ntests,
+			      thread_fn peer1, thread_fn peer2,
+			      int family, unsigned int prefix,
+			      const char *addr1, const char *addr2)
+{
+	union tcp_addr taddr1, taddr2;
+
+	if (inet_pton(family, addr1, &taddr1) != 1)
+		test_error("Can't convert ip address %s", addr1);
+	if (inet_pton(family, addr2, &taddr2) != 1)
+		test_error("Can't convert ip address %s", addr2);
+
+	__test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2);
+}
+extern void test_add_destructor(void (*d)(void));
+
+/* To adjust optmem socket limit, approximately estimate a number,
+ * that is bigger than sizeof(struct tcp_ao_key).
+ */
+#define KERNEL_TCP_AO_KEY_SZ_ROUND_UP	300
+
+extern void test_set_optmem(size_t value);
+extern size_t test_get_optmem(void);
+
+extern const struct sockaddr_in6 addr_any6;
+extern const struct sockaddr_in addr_any4;
+
+#ifdef IPV6_TEST
+# define __TEST_CLIENT_IP(n)	("2001:db8:" __stringify(n) "::1")
+# define TEST_CLIENT_IP	__TEST_CLIENT_IP(1)
+# define TEST_WRONG_IP	"2001:db8:253::1"
+# define TEST_SERVER_IP	"2001:db8:254::1"
+# define TEST_NETWORK	"2001::"
+# define TEST_PREFIX	128
+# define TEST_FAMILY	AF_INET6
+# define SOCKADDR_ANY	addr_any6
+# define sockaddr_af	struct sockaddr_in6
+#else
+# define __TEST_CLIENT_IP(n)	("10.0." __stringify(n) ".1")
+# define TEST_CLIENT_IP	__TEST_CLIENT_IP(1)
+# define TEST_WRONG_IP	"10.0.253.1"
+# define TEST_SERVER_IP	"10.0.254.1"
+# define TEST_NETWORK	"10.0.0.0"
+# define TEST_PREFIX	32
+# define TEST_FAMILY	AF_INET
+# define SOCKADDR_ANY	addr_any4
+# define sockaddr_af	struct sockaddr_in
+#endif
+
+static inline union tcp_addr gen_tcp_addr(union tcp_addr net, size_t n)
+{
+	union tcp_addr ret = net;
+
+#ifdef IPV6_TEST
+	ret.a6.s6_addr32[3] = htonl(n & (BIT(32) - 1));
+	ret.a6.s6_addr32[2] = htonl((n >> 32) & (BIT(32) - 1));
+#else
+	ret.a4.s_addr = htonl(ntohl(net.a4.s_addr) + n);
+#endif
+
+	return ret;
+}
+
+static inline void tcp_addr_to_sockaddr_in(void *dest,
+					   const union tcp_addr *src,
+					   unsigned int port)
+{
+	sockaddr_af *out = dest;
+
+	memset(out, 0, sizeof(*out));
+#ifdef IPV6_TEST
+	out->sin6_family = AF_INET6;
+	out->sin6_port   = port;
+	out->sin6_addr   = src->a6;
+#else
+	out->sin_family  = AF_INET;
+	out->sin_port    = port;
+	out->sin_addr    = src->a4;
+#endif
+}
+
+static inline void test_init(unsigned int ntests,
+			     thread_fn peer1, thread_fn peer2)
+{
+	test_init2(ntests, peer1, peer2, TEST_FAMILY, TEST_PREFIX,
+			TEST_SERVER_IP, TEST_CLIENT_IP);
+}
+extern void synchronize_threads(void);
+extern void switch_ns(int fd);
+
+extern __thread union tcp_addr this_ip_addr;
+extern __thread union tcp_addr this_ip_dest;
+extern int test_family;
+
+extern void randomize_buffer(void *buf, size_t buflen);
+extern int open_netns(void);
+extern int unshare_open_netns(void);
+extern const char veth_name[];
+extern int add_veth(const char *name, int nsfda, int nsfdb);
+extern int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd);
+extern int ip_addr_add(const char *intf, int family,
+		       union tcp_addr addr, uint8_t prefix);
+extern int ip_route_add(const char *intf, int family,
+			union tcp_addr src, union tcp_addr dst);
+extern int ip_route_add_vrf(const char *intf, int family,
+			    union tcp_addr src, union tcp_addr dst,
+			    uint8_t vrf);
+extern int link_set_up(const char *intf);
+
+extern const unsigned int test_server_port;
+extern int test_wait_fd(int sk, time_t sec, bool write);
+extern int __test_connect_socket(int sk, const char *device,
+				 void *addr, size_t addr_sz, time_t timeout);
+extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz);
+
+static inline int test_listen_socket(const union tcp_addr taddr,
+				     unsigned int port, int backlog)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+	return __test_listen_socket(backlog, (void *)&addr, sizeof(addr));
+}
+
+/*
+ * In order for selftests to work under CONFIG_CRYPTO_FIPS=y,
+ * the password should be loger than 14 bytes, see hmac_setkey()
+ */
+#define TEST_TCP_AO_MINKEYLEN	14
+#define DEFAULT_TEST_PASSWORD	"In this hour, I do not believe that any darkness will endure."
+
+#ifndef DEFAULT_TEST_ALGO
+#define DEFAULT_TEST_ALGO	"cmac(aes128)"
+#endif
+
+#ifdef IPV6_TEST
+#define DEFAULT_TEST_PREFIX	128
+#else
+#define DEFAULT_TEST_PREFIX	32
+#endif
+
+/*
+ * Timeout on syscalls where failure is not expected.
+ * You may want to rise it if the test machine is very busy.
+ */
+#ifndef TEST_TIMEOUT_SEC
+#define TEST_TIMEOUT_SEC	5
+#endif
+
+/*
+ * Timeout on connect() where a failure is expected.
+ * If set to 0 - kernel will try to retransmit SYN number of times, set in
+ * /proc/sys/net/ipv4/tcp_syn_retries
+ * By default set to 1 to make tests pass faster on non-busy machine.
+ */
+#ifndef TEST_RETRANSMIT_SEC
+#define TEST_RETRANSMIT_SEC	1
+#endif
+
+static inline int _test_connect_socket(int sk, const union tcp_addr taddr,
+				       unsigned int port, time_t timeout)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+	return __test_connect_socket(sk, veth_name,
+				     (void *)&addr, sizeof(addr), timeout);
+}
+
+static inline int test_connect_socket(int sk, const union tcp_addr taddr,
+				      unsigned int port)
+{
+	return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC);
+}
+
+extern int __test_set_md5(int sk, void *addr, size_t addr_sz,
+			  uint8_t prefix, int vrf, const char *password);
+static inline int test_set_md5(int sk, const union tcp_addr in_addr,
+			       uint8_t prefix, int vrf, const char *password)
+{
+	sockaddr_af addr;
+
+	if (prefix > DEFAULT_TEST_PREFIX)
+		prefix = DEFAULT_TEST_PREFIX;
+
+	tcp_addr_to_sockaddr_in(&addr, &in_addr, 0);
+	return __test_set_md5(sk, (void *)&addr, sizeof(addr),
+			prefix, vrf, password);
+}
+
+extern int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg,
+		void *addr, size_t addr_sz, bool set_current, bool set_rnext,
+		uint8_t prefix, uint8_t vrf,
+		uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+		uint8_t keyflags, uint8_t keylen, const char *key);
+
+static inline int test_prepare_key(struct tcp_ao_add *ao,
+		const char *alg, union tcp_addr taddr,
+		bool set_current, bool set_rnext,
+		uint8_t prefix, uint8_t vrf,
+		uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+		uint8_t keyflags, uint8_t keylen, const char *key)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &taddr, 0);
+	return test_prepare_key_sockaddr(ao, alg, (void *)&addr, sizeof(addr),
+			set_current, set_rnext, prefix, vrf, sndid, rcvid,
+			maclen, keyflags, keylen, key);
+}
+
+static inline int test_prepare_def_key(struct tcp_ao_add *ao,
+		const char *key, uint8_t keyflags,
+		union tcp_addr in_addr, uint8_t prefix, uint8_t vrf,
+		uint8_t sndid, uint8_t rcvid)
+{
+	if (prefix > DEFAULT_TEST_PREFIX)
+		prefix = DEFAULT_TEST_PREFIX;
+
+	return test_prepare_key(ao, DEFAULT_TEST_ALGO, in_addr, false, false,
+				prefix, vrf, sndid, rcvid, 0, keyflags,
+				strlen(key), key);
+}
+
+extern int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out,
+			   void *addr, size_t addr_sz,
+			   uint8_t prefix, uint8_t sndid, uint8_t rcvid);
+extern int test_get_ao_info(int sk, struct tcp_ao_info_opt *out);
+extern int test_set_ao_info(int sk, struct tcp_ao_info_opt *in);
+extern int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a,
+					  const struct tcp_ao_getsockopt *b);
+extern int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a,
+					     const struct tcp_ao_info_opt *b);
+
+static inline int test_verify_socket_key(int sk, struct tcp_ao_add *key)
+{
+	struct tcp_ao_getsockopt key2 = {};
+	int err;
+
+	err = test_get_one_ao(sk, &key2, &key->addr, sizeof(key->addr),
+			      key->prefix, key->sndid, key->rcvid);
+	if (err)
+		return err;
+
+	return test_cmp_getsockopt_setsockopt(key, &key2);
+}
+
+static inline int test_add_key_vrf(int sk,
+				   const char *key, uint8_t keyflags,
+				   union tcp_addr in_addr, uint8_t prefix,
+				   uint8_t vrf, uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_add tmp = {};
+	int err;
+
+	err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix,
+				   vrf, sndid, rcvid);
+	if (err)
+		return err;
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+	if (err < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+static inline int test_add_key(int sk, const char *key,
+			      union tcp_addr in_addr, uint8_t prefix,
+			      uint8_t sndid, uint8_t rcvid)
+{
+	return test_add_key_vrf(sk, key, 0, in_addr, prefix, 0, sndid, rcvid);
+}
+
+static inline int test_verify_socket_ao(int sk, struct tcp_ao_info_opt *ao)
+{
+	struct tcp_ao_info_opt ao2 = {};
+	int err;
+
+	err = test_get_ao_info(sk, &ao2);
+	if (err)
+		return err;
+
+	return test_cmp_getsockopt_setsockopt_ao(ao, &ao2);
+}
+
+static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps)
+{
+	struct tcp_ao_info_opt ao = {};
+	int err;
+
+	err = test_get_ao_info(sk, &ao);
+	/* Maybe ao_info wasn't allocated yet */
+	if (err && err != -ENOENT)
+		return err;
+
+	ao.ao_required = !!ao_required;
+	ao.accept_icmps = !!accept_icmps;
+	err = test_set_ao_info(sk, &ao);
+	if (err)
+		return err;
+
+	return test_verify_socket_ao(sk, &ao);
+}
+
+extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec);
+extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
+				const size_t msg_len, time_t timeout_sec);
+extern int test_client_verify(int sk, const size_t msg_len, const size_t nr,
+			      time_t timeout_sec);
+
+struct tcp_ao_key_counters {
+	uint8_t sndid;
+	uint8_t rcvid;
+	uint64_t pkt_good;
+	uint64_t pkt_bad;
+};
+
+struct tcp_ao_counters {
+	/* per-netns */
+	uint64_t netns_ao_good;
+	uint64_t netns_ao_bad;
+	uint64_t netns_ao_key_not_found;
+	uint64_t netns_ao_required;
+	uint64_t netns_ao_dropped_icmp;
+	/* per-socket */
+	uint64_t ao_info_pkt_good;
+	uint64_t ao_info_pkt_bad;
+	uint64_t ao_info_pkt_key_not_found;
+	uint64_t ao_info_pkt_ao_required;
+	uint64_t ao_info_pkt_dropped_icmp;
+	/* per-key */
+	size_t nr_keys;
+	struct tcp_ao_key_counters *key_cnts;
+};
+extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
+
+#define TEST_CNT_KEY_GOOD		BIT(0)
+#define TEST_CNT_KEY_BAD		BIT(1)
+#define TEST_CNT_SOCK_GOOD		BIT(2)
+#define TEST_CNT_SOCK_BAD		BIT(3)
+#define TEST_CNT_SOCK_KEY_NOT_FOUND	BIT(4)
+#define TEST_CNT_SOCK_AO_REQUIRED	BIT(5)
+#define TEST_CNT_SOCK_DROPPED_ICMP	BIT(6)
+#define TEST_CNT_NS_GOOD		BIT(7)
+#define TEST_CNT_NS_BAD			BIT(8)
+#define TEST_CNT_NS_KEY_NOT_FOUND	BIT(9)
+#define TEST_CNT_NS_AO_REQUIRED		BIT(10)
+#define TEST_CNT_NS_DROPPED_ICMP	BIT(11)
+typedef uint16_t test_cnt;
+
+#define TEST_CNT_AO_GOOD		(TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD)
+#define TEST_CNT_AO_BAD			(TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD)
+#define TEST_CNT_AO_KEY_NOT_FOUND	(TEST_CNT_SOCK_KEY_NOT_FOUND | \
+					 TEST_CNT_NS_KEY_NOT_FOUND)
+#define TEST_CNT_AO_REQUIRED		(TEST_CNT_SOCK_AO_REQUIRED | \
+					 TEST_CNT_NS_AO_REQUIRED)
+#define TEST_CNT_AO_DROPPED_ICMP	(TEST_CNT_SOCK_DROPPED_ICMP | \
+					 TEST_CNT_NS_DROPPED_ICMP)
+#define TEST_CNT_GOOD			(TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD)
+#define TEST_CNT_BAD			(TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD)
+
+extern int __test_tcp_ao_counters_cmp(const char *tst_name,
+		struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+		test_cnt expected);
+extern int test_tcp_ao_key_counters_cmp(const char *tst_name,
+		struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+		test_cnt expected, int sndid, int rcvid);
+extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts);
+/*
+ * Frees buffers allocated in test_get_tcp_ao_counters().
+ * The function doesn't expect new keys or keys removed between calls
+ * to test_get_tcp_ao_counters(). Check key counters manually if they
+ * may change.
+ */
+static inline int test_tcp_ao_counters_cmp(const char *tst_name,
+					   struct tcp_ao_counters *before,
+					   struct tcp_ao_counters *after,
+					   test_cnt expected)
+{
+	int ret;
+
+	ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected);
+	if (ret)
+		goto out;
+	ret = test_tcp_ao_key_counters_cmp(tst_name, before, after,
+					   expected, -1, -1);
+out:
+	test_tcp_ao_counters_free(before);
+	test_tcp_ao_counters_free(after);
+	return ret;
+}
+
+struct netstat;
+extern struct netstat *netstat_read(void);
+extern void netstat_free(struct netstat *ns);
+extern void netstat_print_diff(struct netstat *nsa, struct netstat *nsb);
+extern uint64_t netstat_get(struct netstat *ns,
+			    const char *name, bool *not_found);
+
+static inline uint64_t netstat_get_one(const char *name, bool *not_found)
+{
+	struct netstat *ns = netstat_read();
+	uint64_t ret;
+
+	ret = netstat_get(ns, name, not_found);
+
+	netstat_free(ns);
+	return ret;
+}
+
+struct tcp_sock_queue {
+	uint32_t seq;
+	void *buf;
+};
+
+struct tcp_sock_state {
+	struct tcp_info info;
+	struct tcp_repair_window trw;
+	struct tcp_sock_queue out;
+	int outq_len;		/* output queue size (not sent + not acked) */
+	int outq_nsd_len;	/* output queue size (not sent only) */
+	struct tcp_sock_queue in;
+	int inq_len;
+	int mss;
+	int timestamp;
+};
+
+extern void __test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+				   void *addr, size_t addr_size);
+static inline void test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+					sockaddr_af *saddr)
+{
+	__test_sock_checkpoint(sk, state, saddr, sizeof(*saddr));
+}
+extern void test_ao_checkpoint(int sk, struct tcp_ao_repair *state);
+extern void __test_sock_restore(int sk, const char *device,
+				struct tcp_sock_state *state,
+				void *saddr, void *daddr, size_t addr_size);
+static inline void test_sock_restore(int sk, struct tcp_sock_state *state,
+				     sockaddr_af *saddr,
+				     const union tcp_addr daddr,
+				     unsigned int dport)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &daddr, htons(dport));
+	__test_sock_restore(sk, veth_name, state, saddr, &addr, sizeof(addr));
+}
+extern void test_ao_restore(int sk, struct tcp_ao_repair *state);
+extern void test_sock_state_free(struct tcp_sock_state *state);
+extern void test_enable_repair(int sk);
+extern void test_disable_repair(int sk);
+extern void test_kill_sk(int sk);
+static inline int test_add_repaired_key(int sk,
+					const char *key, uint8_t keyflags,
+					union tcp_addr in_addr, uint8_t prefix,
+					uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_add tmp = {};
+	int err;
+
+	err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix,
+				   0, sndid, rcvid);
+	if (err)
+		return err;
+
+	tmp.set_current = 1;
+	tmp.set_rnext = 1;
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+#endif /* _AOLIB_H_ */
diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
new file mode 100644
index 000000000000..f279ffc3843b
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Check what features does the kernel support (where the selftest is running).
+ * Somewhat inspired by CRIU kerndat/kdat kernel features detector.
+ */
+#include <pthread.h>
+#include "aolib.h"
+
+struct kconfig_t {
+	int _errno;		/* the returned error if not supported */
+	int (*check_kconfig)(int *error);
+};
+
+static int has_net_ns(int *err)
+{
+	if (access("/proc/self/ns/net", F_OK) < 0) {
+		*err = errno;
+		if (errno == ENOENT)
+			return 0;
+		test_print("Unable to access /proc/self/ns/net: %m");
+		return -errno;
+	}
+	return *err = errno = 0;
+}
+
+static int has_veth(int *err)
+{
+	int orig_netns, ns_a, ns_b;
+
+	orig_netns = open_netns();
+	ns_a = unshare_open_netns();
+	ns_b = unshare_open_netns();
+
+	*err = add_veth("check_veth", ns_a, ns_b);
+
+	switch_ns(orig_netns);
+	close(orig_netns);
+	close(ns_a);
+	close(ns_b);
+	return 0;
+}
+
+static int has_tcp_ao(int *err)
+{
+	struct sockaddr_in addr = {
+		.sin_family = test_family,
+	};
+	struct tcp_ao_add tmp = {};
+	const char *password = DEFAULT_TEST_PASSWORD;
+	int sk, ret = 0;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0) {
+		test_print("socket(): %m");
+		return -errno;
+	}
+
+	tmp.sndid = 100;
+	tmp.rcvid = 100;
+	tmp.keylen = strlen(password);
+	memcpy(tmp.key, password, strlen(password));
+	strcpy(tmp.alg_name, "hmac(sha1)");
+	memcpy(&tmp.addr, &addr, sizeof(addr));
+	*err = 0;
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) {
+		*err = errno;
+		if (errno != ENOPROTOOPT)
+			ret = -errno;
+	}
+	close(sk);
+	return ret;
+}
+
+static int has_tcp_md5(int *err)
+{
+	union tcp_addr addr_any = {};
+	int sk, ret = 0;
+
+	sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0) {
+		test_print("socket(): %m");
+		return -errno;
+	}
+
+	/*
+	 * Under CONFIG_CRYPTO_FIPS=y it fails with ENOMEM, rather with
+	 * anything more descriptive. Oh well.
+	 */
+	*err = 0;
+	if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) {
+		*err = errno;
+		if (errno != ENOPROTOOPT && errno == ENOMEM) {
+			test_print("setsockopt(TCP_MD5SIG_EXT): %m");
+			ret = -errno;
+		}
+	}
+	close(sk);
+	return ret;
+}
+
+static int has_vrfs(int *err)
+{
+	int orig_netns, ns_test, ret = 0;
+
+	orig_netns = open_netns();
+	ns_test = unshare_open_netns();
+
+	*err = add_vrf("ksft-check", 55, 101, ns_test);
+	if (*err && *err != -EOPNOTSUPP) {
+		test_print("Failed to add a VRF: %d", *err);
+		ret = *err;
+	}
+
+	switch_ns(orig_netns);
+	close(orig_netns);
+	close(ns_test);
+	return ret;
+}
+
+static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct kconfig_t kconfig[__KCONFIG_LAST__] = {
+	{ -1, has_net_ns },
+	{ -1, has_veth },
+	{ -1, has_tcp_ao },
+	{ -1, has_tcp_md5 },
+	{ -1, has_vrfs },
+};
+
+const char *tests_skip_reason[__KCONFIG_LAST__] = {
+	"Tests require network namespaces support (CONFIG_NET_NS)",
+	"Tests require veth support (CONFIG_VETH)",
+	"Tests require TCP-AO support (CONFIG_TCP_AO)",
+	"setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)",
+	"VRFs are not supported (CONFIG_NET_VRF)",
+};
+
+bool kernel_config_has(enum test_needs_kconfig k)
+{
+	bool ret;
+
+	pthread_mutex_lock(&kconfig_lock);
+	if (kconfig[k]._errno == -1) {
+		if (kconfig[k].check_kconfig(&kconfig[k]._errno))
+			test_error("Failed to initialize kconfig %u", k);
+	}
+	ret = kconfig[k]._errno == 0;
+	pthread_mutex_unlock(&kconfig_lock);
+	return ret;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c
new file mode 100644
index 000000000000..7f108493a29a
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Original from tools/testing/selftests/net/ipsec.c */
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <net/if.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "aolib.h"
+
+#define MAX_PAYLOAD		2048
+
+static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
+{
+	if (*sock > 0) {
+		seq_nr++;
+		return 0;
+	}
+
+	*sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
+	if (*sock < 0) {
+		test_print("socket(AF_NETLINK)");
+		return -1;
+	}
+
+	randomize_buffer(seq_nr, sizeof(*seq_nr));
+
+	return 0;
+}
+
+static int netlink_check_answer(int sock, bool quite)
+{
+	struct nlmsgerror {
+		struct nlmsghdr hdr;
+		int error;
+		struct nlmsghdr orig_msg;
+	} answer;
+
+	if (recv(sock, &answer, sizeof(answer), 0) < 0) {
+		test_print("recv()");
+		return -1;
+	} else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
+		test_print("expected NLMSG_ERROR, got %d",
+			   (int)answer.hdr.nlmsg_type);
+		return -1;
+	} else if (answer.error) {
+		if (!quite) {
+			test_print("NLMSG_ERROR: %d: %s",
+				answer.error, strerror(-answer.error));
+		}
+		return answer.error;
+	}
+
+	return 0;
+}
+
+static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
+{
+	return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
+}
+
+static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type, const void *payload, size_t size)
+{
+	/* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
+	struct rtattr *attr = rtattr_hdr(nh);
+	size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
+
+	if (req_sz < nl_size) {
+		test_print("req buf is too small: %zu < %zu", req_sz, nl_size);
+		return -1;
+	}
+	nh->nlmsg_len = nl_size;
+
+	attr->rta_len = RTA_LENGTH(size);
+	attr->rta_type = rta_type;
+	memcpy(RTA_DATA(attr), payload, size);
+
+	return 0;
+}
+
+static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type, const void *payload, size_t size)
+{
+	struct rtattr *ret = rtattr_hdr(nh);
+
+	if (rtattr_pack(nh, req_sz, rta_type, payload, size))
+		return 0;
+
+	return ret;
+}
+
+static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type)
+{
+	return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
+}
+
+static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+	char *nlmsg_end = (char *)nh + nh->nlmsg_len;
+
+	attr->rta_len = nlmsg_end - (char *)attr;
+}
+
+static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
+		const char *peer, int ns)
+{
+	struct ifinfomsg pi;
+	struct rtattr *peer_attr;
+
+	memset(&pi, 0, sizeof(pi));
+	pi.ifi_family	= AF_UNSPEC;
+	pi.ifi_change	= 0xFFFFFFFF;
+
+	peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
+	if (!peer_attr)
+		return -1;
+
+	if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
+		return -1;
+
+	if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
+		return -1;
+
+	rtattr_end(nh, peer_attr);
+
+	return 0;
+}
+
+static int __add_veth(int sock, uint32_t seq, const char *name,
+		      int ns_a, int ns_b)
+{
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+	struct {
+		struct nlmsghdr		nh;
+		struct ifinfomsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	static const char veth_type[] = "veth";
+	struct rtattr *link_info, *info_data;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWLINK;
+	req.nh.nlmsg_flags	= flags;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifi_family	= AF_UNSPEC;
+	req.info.ifi_change	= 0xFFFFFFFF;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name)))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
+		return -1;
+
+	link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+	if (!link_info)
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
+		return -1;
+
+	info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+	if (!info_data)
+		return -1;
+
+	if (veth_pack_peerb(&req.nh, sizeof(req), name, ns_b))
+		return -1;
+
+	rtattr_end(&req.nh, info_data);
+	rtattr_end(&req.nh, link_info);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock, false);
+}
+
+int add_veth(const char *name, int nsfda, int nsfdb)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __add_veth(route_sock, route_seq++, name, nsfda, nsfdb);
+	close(route_sock);
+	return ret;
+}
+
+static int __ip_addr_add(int sock, uint32_t seq, const char *intf,
+			 int family, union tcp_addr addr, uint8_t prefix)
+{
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+	struct {
+		struct nlmsghdr		nh;
+		struct ifaddrmsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) :
+						sizeof(struct in6_addr);
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWADDR;
+	req.nh.nlmsg_flags	= flags;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifa_family	= family;
+	req.info.ifa_prefixlen	= prefix;
+	req.info.ifa_index	= if_nametoindex(intf);
+	req.info.ifa_flags	= IFA_F_NODAD;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, addr_len))
+		return -1;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock, true);
+}
+
+int ip_addr_add(const char *intf, int family,
+		union tcp_addr addr, uint8_t prefix)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __ip_addr_add(route_sock, route_seq++, intf,
+			    family, addr, prefix);
+
+	close(route_sock);
+	return ret;
+}
+
+static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family,
+			  union tcp_addr src, union tcp_addr dst, uint8_t vrf)
+{
+	struct {
+		struct nlmsghdr	nh;
+		struct rtmsg	rt;
+		char		attrbuf[MAX_PAYLOAD];
+	} req;
+	unsigned int index = if_nametoindex(intf);
+	size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) :
+						sizeof(struct in6_addr);
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.rt));
+	req.nh.nlmsg_type	= RTM_NEWROUTE;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+	req.nh.nlmsg_seq	= seq;
+	req.rt.rtm_family	= family;
+	req.rt.rtm_dst_len	= (family == AF_INET) ? 32 : 128;
+	req.rt.rtm_table	= vrf;
+	req.rt.rtm_protocol	= RTPROT_BOOT;
+	req.rt.rtm_scope	= RT_SCOPE_UNIVERSE;
+	req.rt.rtm_type		= RTN_UNICAST;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, addr_len))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, addr_len))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
+		return -1;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+
+	return netlink_check_answer(sock, true);
+}
+
+int ip_route_add_vrf(const char *intf, int family,
+		 union tcp_addr src, union tcp_addr dst, uint8_t vrf)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __ip_route_add(route_sock, route_seq++, intf,
+			     family, src, dst, vrf);
+
+	close(route_sock);
+	return ret;
+}
+
+int ip_route_add(const char *intf, int family,
+		 union tcp_addr src, union tcp_addr dst)
+{
+	return ip_route_add_vrf(intf, family, src, dst, RT_TABLE_MAIN);
+}
+
+static int __link_set_up(int sock, uint32_t seq, const char *intf)
+{
+	struct {
+		struct nlmsghdr		nh;
+		struct ifinfomsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWLINK;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifi_family	= AF_UNSPEC;
+	req.info.ifi_change	= 0xFFFFFFFF;
+	req.info.ifi_index	= if_nametoindex(intf);
+	req.info.ifi_flags	= IFF_UP;
+	req.info.ifi_change	= IFF_UP;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock, false);
+}
+
+int link_set_up(const char *intf)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __link_set_up(route_sock, route_seq++, intf);
+
+	close(route_sock);
+	return ret;
+}
+
+static int __add_vrf(int sock, uint32_t seq, const char *name,
+		     uint32_t tabid, int ifindex, int nsfd)
+{
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+	struct {
+		struct nlmsghdr		nh;
+		struct ifinfomsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	static const char vrf_type[] = "vrf";
+	struct rtattr *link_info, *info_data;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWLINK;
+	req.nh.nlmsg_flags	= flags;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifi_family	= AF_UNSPEC;
+	req.info.ifi_change	= 0xFFFFFFFF;
+	req.info.ifi_index	= ifindex;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name)))
+		return -1;
+
+	if (nsfd >= 0)
+		if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD,
+				&nsfd, sizeof(nsfd)))
+			return -1;
+
+	link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+	if (!link_info)
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, vrf_type, sizeof(vrf_type)))
+		return -1;
+
+	info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+	if (!info_data)
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_VRF_TABLE,
+			&tabid, sizeof(tabid)))
+		return -1;
+
+	rtattr_end(&req.nh, info_data);
+	rtattr_end(&req.nh, link_info);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock, true);
+}
+
+int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __add_vrf(route_sock, route_seq++, name, tabid, ifindex, nsfd);
+	close(route_sock);
+	return ret;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c
new file mode 100644
index 000000000000..2fb6dd8adba6
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdio.h>
+#include "../../../../../include/linux/compiler.h"
+#include "../../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+struct netstat_counter {
+	uint64_t val;
+	char *name;
+};
+
+struct netstat {
+	char *header_name;
+	struct netstat *next;
+	size_t counters_nr;
+	struct netstat_counter *counters;
+};
+
+static struct netstat *lookup_type(struct netstat *ns,
+		const char *type, size_t len)
+{
+	while (ns != NULL) {
+		size_t cmp = max(len, strlen(ns->header_name));
+
+		if (!strncmp(ns->header_name, type, cmp))
+			return ns;
+		ns = ns->next;
+	}
+	return NULL;
+}
+
+static struct netstat *lookup_get(struct netstat *ns,
+				  const char *type, const size_t len)
+{
+	struct netstat *ret;
+
+	ret = lookup_type(ns, type, len);
+	if (ret != NULL)
+		return ret;
+
+	ret = malloc(sizeof(struct netstat));
+	if (!ret)
+		test_error("malloc()");
+
+	ret->header_name = strndup(type, len);
+	if (ret->header_name == NULL)
+		test_error("strndup()");
+	ret->next = ns;
+	ret->counters_nr = 0;
+	ret->counters = NULL;
+
+	return ret;
+}
+
+static struct netstat *lookup_get_column(struct netstat *ns, const char *line)
+{
+	char *column;
+
+	column = strchr(line, ':');
+	if (!column)
+		test_error("can't parse netstat file");
+
+	return lookup_get(ns, line, column - line);
+}
+
+static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line)
+{
+	struct netstat *type = lookup_get_column(*dest, line);
+	const char *pos = line;
+	size_t i, nr_elems = 0;
+	char tmp;
+
+	while ((pos = strchr(pos, ' '))) {
+		nr_elems++;
+		pos++;
+	}
+
+	*dest = type;
+	type->counters = reallocarray(type->counters,
+				type->counters_nr + nr_elems,
+				sizeof(struct netstat_counter));
+	if (!type->counters)
+		test_error("reallocarray()");
+
+	pos = strchr(line, ' ') + 1;
+
+	if (fscanf(fnetstat, type->header_name) == EOF)
+		test_error("fscanf(%s)", type->header_name);
+	if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':')
+		test_error("Unexpected netstat format (%c)", tmp);
+
+	for (i = type->counters_nr; i < type->counters_nr + nr_elems; i++) {
+		struct netstat_counter *nc = &type->counters[i];
+		const char *new_pos = strchr(pos, ' ');
+		const char *fmt = " %" PRIu64;
+
+		if (new_pos == NULL)
+			new_pos = strchr(pos, '\n');
+
+		nc->name = strndup(pos, new_pos - pos);
+		if (nc->name == NULL)
+			test_error("strndup()");
+
+		if (unlikely(!strcmp(nc->name, "MaxConn")))
+			fmt = " %" PRId64; /* MaxConn is signed, RFC 2012 */
+		if (fscanf(fnetstat, fmt, &nc->val) != 1)
+			test_error("fscanf(%s)", nc->name);
+		pos = new_pos + 1;
+	}
+	type->counters_nr += nr_elems;
+
+	if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != '\n')
+		test_error("Unexpected netstat format");
+}
+
+static const char *snmp6_name = "Snmp6";
+static void snmp6_read(FILE *fnetstat, struct netstat **dest)
+{
+	struct netstat *type = lookup_get(*dest, snmp6_name, strlen(snmp6_name));
+	char *counter_name;
+	size_t i;
+
+	for (i = type->counters_nr;; i++) {
+		struct netstat_counter *nc;
+		uint64_t counter;
+
+		if (fscanf(fnetstat, "%ms", &counter_name) == EOF)
+			break;
+		if (fscanf(fnetstat, "%" PRIu64, &counter) == EOF)
+			test_error("Unexpected snmp6 format");
+		type->counters = reallocarray(type->counters, i + 1,
+					sizeof(struct netstat_counter));
+		if (!type->counters)
+			test_error("reallocarray()");
+		nc = &type->counters[i];
+		nc->name = counter_name;
+		nc->val = counter;
+	}
+	type->counters_nr = i;
+	*dest = type;
+}
+
+struct netstat *netstat_read(void)
+{
+	struct netstat *ret = 0;
+	size_t line_sz = 0;
+	char *line = NULL;
+	FILE *fnetstat;
+
+	/*
+	 * Opening thread-self instead of /proc/net/... as the latter
+	 * points to /proc/self/net/ which instantiates thread-leader's
+	 * net-ns, see:
+	 * commit 155134fef2b6 ("Revert "proc: Point /proc/{mounts,net} at..")
+	 */
+	errno = 0;
+	fnetstat = fopen("/proc/thread-self/net/netstat", "r");
+	if (fnetstat == NULL)
+		test_error("failed to open /proc/net/netstat");
+
+	while (getline(&line, &line_sz, fnetstat) != -1)
+		netstat_read_type(fnetstat, &ret, line);
+	fclose(fnetstat);
+
+	errno = 0;
+	fnetstat = fopen("/proc/thread-self/net/snmp", "r");
+	if (fnetstat == NULL)
+		test_error("failed to open /proc/net/snmp");
+
+	while (getline(&line, &line_sz, fnetstat) != -1)
+		netstat_read_type(fnetstat, &ret, line);
+	fclose(fnetstat);
+
+	errno = 0;
+	fnetstat = fopen("/proc/thread-self/net/snmp6", "r");
+	if (fnetstat == NULL)
+		test_error("failed to open /proc/net/snmp6");
+
+	snmp6_read(fnetstat, &ret);
+	fclose(fnetstat);
+
+	free(line);
+	return ret;
+}
+
+void netstat_free(struct netstat *ns)
+{
+	while (ns != NULL) {
+		struct netstat *prev = ns;
+		size_t i;
+
+		free(ns->header_name);
+		for (i = 0; i < ns->counters_nr; i++)
+			free(ns->counters[i].name);
+		free(ns->counters);
+		ns = ns->next;
+		free(prev);
+	}
+}
+
+static inline void
+__netstat_print_diff(uint64_t a, struct netstat *nsb, size_t i)
+{
+	if (unlikely(!strcmp(nsb->header_name, "MaxConn"))) {
+		test_print("%8s %25s: %" PRId64 " => %" PRId64,
+				nsb->header_name, nsb->counters[i].name,
+				a, nsb->counters[i].val);
+		return;
+	}
+
+	test_print("%8s %25s: %" PRIu64 " => %" PRIu64, nsb->header_name,
+			nsb->counters[i].name, a, nsb->counters[i].val);
+}
+
+void netstat_print_diff(struct netstat *nsa, struct netstat *nsb)
+{
+	size_t i, j;
+
+	while (nsb != NULL) {
+		if (unlikely(strcmp(nsb->header_name, nsa->header_name))) {
+			for (i = 0; i < nsb->counters_nr; i++)
+				__netstat_print_diff(0, nsb, i);
+			nsb = nsb->next;
+			continue;
+		}
+
+		if (nsb->counters_nr < nsa->counters_nr)
+			test_error("Unexpected: some counters disappeared!");
+
+		for (j = 0, i = 0; i < nsb->counters_nr; i++) {
+			if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) {
+				__netstat_print_diff(0, nsb, i);
+				continue;
+			}
+
+			if (nsa->counters[j].val == nsb->counters[i].val) {
+				j++;
+				continue;
+			}
+
+			__netstat_print_diff(nsa->counters[j].val, nsb, i);
+			j++;
+		}
+		if (j != nsa->counters_nr)
+			test_error("Unexpected: some counters disappeared!");
+
+		nsb = nsb->next;
+		nsa = nsa->next;
+	}
+}
+
+uint64_t netstat_get(struct netstat *ns, const char *name, bool *not_found)
+{
+	if (not_found)
+		*not_found = false;
+
+	while (ns != NULL) {
+		size_t i;
+
+		for (i = 0; i < ns->counters_nr; i++) {
+			if (!strcmp(name, ns->counters[i].name))
+				return ns->counters[i].val;
+		}
+
+		ns = ns->next;
+	}
+
+	if (not_found)
+		*not_found = true;
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/repair.c b/tools/testing/selftests/net/tcp_ao/lib/repair.c
new file mode 100644
index 000000000000..9893b3ba69f5
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/repair.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets
+ * It tests that TCP-AO enabled connection can be restored.
+ * For the proper socket repair see:
+ * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h
+ */
+#include <fcntl.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include "aolib.h"
+
+#ifndef TCPOPT_MAXSEG
+# define TCPOPT_MAXSEG		2
+#endif
+#ifndef TCPOPT_WINDOW
+# define TCPOPT_WINDOW		3
+#endif
+#ifndef TCPOPT_SACK_PERMITTED
+# define TCPOPT_SACK_PERMITTED	4
+#endif
+#ifndef TCPOPT_TIMESTAMP
+# define TCPOPT_TIMESTAMP	8
+#endif
+
+enum {
+	TCP_ESTABLISHED = 1,
+	TCP_SYN_SENT,
+	TCP_SYN_RECV,
+	TCP_FIN_WAIT1,
+	TCP_FIN_WAIT2,
+	TCP_TIME_WAIT,
+	TCP_CLOSE,
+	TCP_CLOSE_WAIT,
+	TCP_LAST_ACK,
+	TCP_LISTEN,
+	TCP_CLOSING,	/* Now a valid state */
+	TCP_NEW_SYN_RECV,
+
+	TCP_MAX_STATES	/* Leave at the end! */
+};
+
+static void test_sock_checkpoint_queue(int sk, int queue, int qlen,
+				       struct tcp_sock_queue *q)
+{
+	socklen_t len;
+	int ret;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+		test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+	len = sizeof(q->seq);
+	ret = getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &q->seq, &len);
+	if (ret || len != sizeof(q->seq))
+		test_error("getsockopt(TCP_QUEUE_SEQ): %d", (int)len);
+
+	if (!qlen) {
+		q->buf = NULL;
+		return;
+	}
+
+	q->buf = malloc(qlen);
+	if (q->buf == NULL)
+		test_error("malloc()");
+	ret = recv(sk, q->buf, qlen, MSG_PEEK | MSG_DONTWAIT);
+	if (ret != qlen)
+		test_error("recv(%d): %d", qlen, ret);
+}
+
+void __test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+			    void *addr, size_t addr_size)
+{
+	socklen_t len = sizeof(state->info);
+	int ret;
+
+	memset(state, 0, sizeof(*state));
+
+	ret = getsockopt(sk, SOL_TCP, TCP_INFO, &state->info, &len);
+	if (ret || len != sizeof(state->info))
+		test_error("getsockopt(TCP_INFO): %d", (int)len);
+
+	len = addr_size;
+	if (getsockname(sk, addr, &len) || len != addr_size)
+		test_error("getsockname(): %d", (int)len);
+
+	len = sizeof(state->trw);
+	ret = getsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, &len);
+	if (ret || len != sizeof(state->trw))
+		test_error("getsockopt(TCP_REPAIR_WINDOW): %d", (int)len);
+
+	if (ioctl(sk, SIOCOUTQ, &state->outq_len))
+		test_error("ioctl(SIOCOUTQ)");
+
+	if (ioctl(sk, SIOCOUTQNSD, &state->outq_nsd_len))
+		test_error("ioctl(SIOCOUTQNSD)");
+	test_sock_checkpoint_queue(sk, TCP_SEND_QUEUE, state->outq_len, &state->out);
+
+	if (ioctl(sk, SIOCINQ, &state->inq_len))
+		test_error("ioctl(SIOCINQ)");
+	test_sock_checkpoint_queue(sk, TCP_RECV_QUEUE, state->inq_len, &state->in);
+
+	if (state->info.tcpi_state == TCP_CLOSE)
+		state->outq_len = state->outq_nsd_len = 0;
+
+	len = sizeof(state->mss);
+	ret = getsockopt(sk, SOL_TCP, TCP_MAXSEG, &state->mss, &len);
+	if (ret || len != sizeof(state->mss))
+		test_error("getsockopt(TCP_MAXSEG): %d", (int)len);
+
+	len = sizeof(state->timestamp);
+	ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &state->timestamp, &len);
+	if (ret || len != sizeof(state->timestamp))
+		test_error("getsockopt(TCP_TIMESTAMP): %d", (int)len);
+}
+
+void test_ao_checkpoint(int sk, struct tcp_ao_repair *state)
+{
+	socklen_t len = sizeof(*state);
+	int ret;
+
+	memset(state, 0, sizeof(*state));
+
+	ret = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, &len);
+	if (ret || len != sizeof(*state))
+		test_error("getsockopt(TCP_AO_REPAIR): %d", (int)len);
+}
+
+static void test_sock_restore_seq(int sk, int queue, uint32_t seq)
+{
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+		test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+	if (setsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &seq, sizeof(seq)))
+		test_error("setsockopt(TCP_QUEUE_SEQ)");
+}
+
+static void test_sock_restore_queue(int sk, int queue, void *buf, int len)
+{
+	int chunk = len;
+	size_t off = 0;
+
+	if (len == 0)
+		return;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+		test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+	do {
+		int ret;
+
+		ret = send(sk, buf + off, chunk, 0);
+		if (ret <= 0) {
+			if (chunk > 1024) {
+				chunk >>= 1;
+				continue;
+			}
+			test_error("send()");
+		}
+		off += ret;
+		len -= ret;
+	} while (len > 0);
+}
+
+void __test_sock_restore(int sk, const char *device,
+			 struct tcp_sock_state *state,
+			 void *saddr, void *daddr, size_t addr_size)
+{
+	struct tcp_repair_opt opts[4];
+	unsigned int opt_nr = 0;
+	long flags;
+
+	if (bind(sk, saddr, addr_size))
+		test_error("bind()");
+
+	flags = fcntl(sk, F_GETFL);
+	if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+		test_error("fcntl()");
+
+	test_sock_restore_seq(sk, TCP_RECV_QUEUE, state->in.seq - state->inq_len);
+	test_sock_restore_seq(sk, TCP_SEND_QUEUE, state->out.seq - state->outq_len);
+
+	if (device != NULL && setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE,
+					 device, strlen(device) + 1))
+		test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
+
+	if (connect(sk, daddr, addr_size))
+		test_error("connect()");
+
+	if (state->info.tcpi_options & TCPI_OPT_SACK) {
+		opts[opt_nr].opt_code = TCPOPT_SACK_PERMITTED;
+		opts[opt_nr].opt_val = 0;
+		opt_nr++;
+	}
+	if (state->info.tcpi_options & TCPI_OPT_WSCALE) {
+		opts[opt_nr].opt_code = TCPOPT_WINDOW;
+		opts[opt_nr].opt_val = state->info.tcpi_snd_wscale +
+				(state->info.tcpi_rcv_wscale << 16);
+		opt_nr++;
+	}
+	if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) {
+		opts[opt_nr].opt_code = TCPOPT_TIMESTAMP;
+		opts[opt_nr].opt_val = 0;
+		opt_nr++;
+	}
+	opts[opt_nr].opt_code = TCPOPT_MAXSEG;
+	opts[opt_nr].opt_val = state->mss;
+	opt_nr++;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_OPTIONS, opts, opt_nr * sizeof(opts[0])))
+		test_error("setsockopt(TCP_REPAIR_OPTIONS)");
+
+	if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) {
+		if (setsockopt(sk, SOL_TCP, TCP_TIMESTAMP,
+			       &state->timestamp, opt_nr * sizeof(opts[0])))
+			test_error("setsockopt(TCP_TIMESTAMP)");
+	}
+	test_sock_restore_queue(sk, TCP_RECV_QUEUE, state->in.buf, state->inq_len);
+	test_sock_restore_queue(sk, TCP_SEND_QUEUE, state->out.buf, state->outq_len);
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, sizeof(state->trw)))
+		test_error("setsockopt(TCP_REPAIR_WINDOW)");
+}
+
+void test_ao_restore(int sk, struct tcp_ao_repair *state)
+{
+	if (setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, sizeof(*state)))
+		test_error("setsockopt(TCP_AO_REPAIR)");
+}
+
+void test_sock_state_free(struct tcp_sock_state *state)
+{
+	free(state->out.buf);
+	free(state->in.buf);
+}
+
+void test_enable_repair(int sk)
+{
+	int val = TCP_REPAIR_ON;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR)");
+}
+
+void test_disable_repair(int sk)
+{
+	int val = TCP_REPAIR_OFF_NO_WP;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR)");
+}
+
+void test_kill_sk(int sk)
+{
+	test_enable_repair(sk);
+	close(sk);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
new file mode 100644
index 000000000000..92276f916f2f
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include "aolib.h"
+
+/*
+ * Can't be included in the header: it defines static variables which
+ * will be unique to every object. Let's include it only once here.
+ */
+#include "../../../kselftest.h"
+
+/* Prevent overriding of one thread's output by another */
+static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+void __test_msg(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_print_msg(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_ok(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_pass(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_fail(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_fail(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_xfail(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_xfail(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_error(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_error(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_skip(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_skip(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+
+static volatile int failed;
+static volatile int skipped;
+
+void test_failed(void)
+{
+	failed = 1;
+}
+
+static void test_exit(void)
+{
+	if (failed) {
+		ksft_exit_fail();
+	} else if (skipped) {
+		/* ksft_exit_skip() is different from ksft_exit_*() */
+		ksft_print_cnts();
+		exit(KSFT_SKIP);
+	} else {
+		ksft_exit_pass();
+	}
+}
+
+struct dlist_t {
+	void (*destruct)(void);
+	struct dlist_t *next;
+};
+static struct dlist_t *destructors_list;
+
+void test_add_destructor(void (*d)(void))
+{
+	struct dlist_t *p;
+
+	p = malloc(sizeof(struct dlist_t));
+	if (p == NULL)
+		test_error("malloc() failed");
+
+	p->next = destructors_list;
+	p->destruct = d;
+	destructors_list = p;
+}
+
+static void test_destructor(void) __attribute__((destructor));
+static void test_destructor(void)
+{
+	while (destructors_list) {
+		struct dlist_t *p = destructors_list->next;
+
+		destructors_list->destruct();
+		free(destructors_list);
+		destructors_list = p;
+	}
+	test_exit();
+}
+
+static void sig_int(int signo)
+{
+	test_error("Caught SIGINT - exiting");
+}
+
+int open_netns(void)
+{
+	const char *netns_path = "/proc/self/ns/net";
+	int fd;
+
+	fd = open(netns_path, O_RDONLY);
+	if (fd < 0)
+		test_error("open(%s)", netns_path);
+	return fd;
+}
+
+int unshare_open_netns(void)
+{
+	if (unshare(CLONE_NEWNET) != 0)
+		test_error("unshare()");
+
+	return open_netns();
+}
+
+void switch_ns(int fd)
+{
+	if (setns(fd, CLONE_NEWNET))
+		test_error("setns()");
+}
+
+int switch_save_ns(int new_ns)
+{
+	int ret = open_netns();
+
+	switch_ns(new_ns);
+	return ret;
+}
+
+static int nsfd_outside	= -1;
+static int nsfd_parent	= -1;
+static int nsfd_child	= -1;
+const char veth_name[]	= "ktst-veth";
+
+static void init_namespaces(void)
+{
+	nsfd_outside = open_netns();
+	nsfd_parent = unshare_open_netns();
+	nsfd_child = unshare_open_netns();
+}
+
+static void link_init(const char *veth, int family, uint8_t prefix,
+		      union tcp_addr addr, union tcp_addr dest)
+{
+	if (link_set_up(veth))
+		test_error("Failed to set link up");
+	if (ip_addr_add(veth, family, addr, prefix))
+		test_error("Failed to add ip address");
+	if (ip_route_add(veth, family, addr, dest))
+		test_error("Failed to add route");
+}
+
+static unsigned int nr_threads = 1;
+
+static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t sync_cond = PTHREAD_COND_INITIALIZER;
+static volatile unsigned int stage_threads[2];
+static volatile unsigned int stage_nr;
+
+/* synchronize all threads in the same stage */
+void synchronize_threads(void)
+{
+	unsigned int q = stage_nr;
+
+	pthread_mutex_lock(&sync_lock);
+	stage_threads[q]++;
+	if (stage_threads[q] == nr_threads) {
+		stage_nr ^= 1;
+		stage_threads[stage_nr] = 0;
+		pthread_cond_signal(&sync_cond);
+	}
+	while (stage_threads[q] < nr_threads)
+		pthread_cond_wait(&sync_cond, &sync_lock);
+	pthread_mutex_unlock(&sync_lock);
+}
+
+__thread union tcp_addr this_ip_addr;
+__thread union tcp_addr this_ip_dest;
+int test_family;
+
+struct new_pthread_arg {
+	thread_fn	func;
+	union tcp_addr	my_ip;
+	union tcp_addr	dest_ip;
+};
+static void *new_pthread_entry(void *arg)
+{
+	struct new_pthread_arg *p = arg;
+
+	this_ip_addr = p->my_ip;
+	this_ip_dest = p->dest_ip;
+	p->func(NULL); /* shouldn't return */
+	exit(KSFT_FAIL);
+}
+
+static void __test_skip_all(const char *msg)
+{
+	ksft_set_plan(1);
+	ksft_print_header();
+	skipped = 1;
+	test_skip("%s", msg);
+	exit(KSFT_SKIP);
+}
+
+void __test_init(unsigned int ntests, int family, unsigned int prefix,
+		 union tcp_addr addr1, union tcp_addr addr2,
+		 thread_fn peer1, thread_fn peer2)
+{
+	struct sigaction sa = {
+		.sa_handler = sig_int,
+		.sa_flags = SA_RESTART,
+	};
+	time_t seed = time(NULL);
+
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGINT, &sa, NULL))
+		test_error("Can't set SIGINT handler");
+
+	test_family = family;
+	if (!kernel_config_has(KCONFIG_NET_NS))
+		__test_skip_all(tests_skip_reason[KCONFIG_NET_NS]);
+	if (!kernel_config_has(KCONFIG_VETH))
+		__test_skip_all(tests_skip_reason[KCONFIG_VETH]);
+	if (!kernel_config_has(KCONFIG_TCP_AO))
+		__test_skip_all(tests_skip_reason[KCONFIG_TCP_AO]);
+
+	ksft_set_plan(ntests);
+	test_print("rand seed %u", (unsigned int)seed);
+	srand(seed);
+
+
+	ksft_print_header();
+	init_namespaces();
+
+	if (add_veth(veth_name, nsfd_parent, nsfd_child))
+		test_error("Failed to add veth");
+
+	switch_ns(nsfd_child);
+	link_init(veth_name, family, prefix, addr2, addr1);
+	if (peer2) {
+		struct new_pthread_arg targ;
+		pthread_t t;
+
+		targ.my_ip = addr2;
+		targ.dest_ip = addr1;
+		targ.func = peer2;
+		nr_threads++;
+		if (pthread_create(&t, NULL, new_pthread_entry, &targ))
+			test_error("Failed to create pthread");
+	}
+	switch_ns(nsfd_parent);
+	link_init(veth_name, family, prefix, addr1, addr2);
+
+	this_ip_addr = addr1;
+	this_ip_dest = addr2;
+	peer1(NULL);
+	if (failed)
+		exit(KSFT_FAIL);
+	else
+		exit(KSFT_PASS);
+}
+
+/* /proc/sys/net/core/optmem_max artifically limits the amount of memory
+ * that can be allocated with sock_kmalloc() on each socket in the system.
+ * It is not virtualized in v6.7, so it has to written outside test
+ * namespaces. To be nice a test will revert optmem back to the old value.
+ * Keeping it simple without any file lock, which means the tests that
+ * need to set/increase optmem value shouldn't run in parallel.
+ * Also, not re-entrant.
+ * Since commit f5769faeec36 ("net: Namespace-ify sysctl_optmem_max")
+ * it is per-namespace, keeping logic for non-virtualized optmem_max
+ * for v6.7, which supports TCP-AO.
+ */
+static const char *optmem_file = "/proc/sys/net/core/optmem_max";
+static size_t saved_optmem;
+static int optmem_ns = -1;
+
+static bool is_optmem_namespaced(void)
+{
+	if (optmem_ns == -1) {
+		int old_ns = switch_save_ns(nsfd_child);
+
+		optmem_ns = !access(optmem_file, F_OK);
+		switch_ns(old_ns);
+	}
+	return !!optmem_ns;
+}
+
+size_t test_get_optmem(void)
+{
+	int old_ns = 0;
+	FILE *foptmem;
+	size_t ret;
+
+	if (!is_optmem_namespaced())
+		old_ns = switch_save_ns(nsfd_outside);
+	foptmem = fopen(optmem_file, "r");
+	if (!foptmem)
+		test_error("failed to open %s", optmem_file);
+
+	if (fscanf(foptmem, "%zu", &ret) != 1)
+		test_error("can't read from %s", optmem_file);
+	fclose(foptmem);
+	if (!is_optmem_namespaced())
+		switch_ns(old_ns);
+	return ret;
+}
+
+static void __test_set_optmem(size_t new, size_t *old)
+{
+	int old_ns = 0;
+	FILE *foptmem;
+
+	if (old != NULL)
+		*old = test_get_optmem();
+
+	if (!is_optmem_namespaced())
+		old_ns = switch_save_ns(nsfd_outside);
+	foptmem = fopen(optmem_file, "w");
+	if (!foptmem)
+		test_error("failed to open %s", optmem_file);
+
+	if (fprintf(foptmem, "%zu", new) <= 0)
+		test_error("can't write %zu to %s", new, optmem_file);
+	fclose(foptmem);
+	if (!is_optmem_namespaced())
+		switch_ns(old_ns);
+}
+
+static void test_revert_optmem(void)
+{
+	if (saved_optmem == 0)
+		return;
+
+	__test_set_optmem(saved_optmem, NULL);
+}
+
+void test_set_optmem(size_t value)
+{
+	if (saved_optmem == 0) {
+		__test_set_optmem(value, &saved_optmem);
+		test_add_destructor(test_revert_optmem);
+	} else {
+		__test_set_optmem(value, NULL);
+	}
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
new file mode 100644
index 000000000000..15aeb0963058
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -0,0 +1,596 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <alloca.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include "../../../../../include/linux/kernel.h"
+#include "../../../../../include/linux/stringify.h"
+#include "aolib.h"
+
+const unsigned int test_server_port = 7010;
+int __test_listen_socket(int backlog, void *addr, size_t addr_sz)
+{
+	int err, sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	long flags;
+
+	if (sk < 0)
+		test_error("socket()");
+
+	err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, veth_name,
+			 strlen(veth_name) + 1);
+	if (err < 0)
+		test_error("setsockopt(SO_BINDTODEVICE)");
+
+	if (bind(sk, (struct sockaddr *)addr, addr_sz) < 0)
+		test_error("bind()");
+
+	flags = fcntl(sk, F_GETFL);
+	if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+		test_error("fcntl()");
+
+	if (listen(sk, backlog))
+		test_error("listen()");
+
+	return sk;
+}
+
+int test_wait_fd(int sk, time_t sec, bool write)
+{
+	struct timeval tv = { .tv_sec = sec };
+	struct timeval *ptv = NULL;
+	fd_set fds, efds;
+	int ret;
+	socklen_t slen = sizeof(ret);
+
+	FD_ZERO(&fds);
+	FD_SET(sk, &fds);
+	FD_ZERO(&efds);
+	FD_SET(sk, &efds);
+
+	if (sec)
+		ptv = &tv;
+
+	errno = 0;
+	if (write)
+		ret = select(sk + 1, NULL, &fds, &efds, ptv);
+	else
+		ret = select(sk + 1, &fds, NULL, &efds, ptv);
+	if (ret < 0)
+		return -errno;
+	if (ret == 0) {
+		errno = ETIMEDOUT;
+		return -ETIMEDOUT;
+	}
+
+	if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen))
+		return -errno;
+	if (ret)
+		return -ret;
+	return 0;
+}
+
+int __test_connect_socket(int sk, const char *device,
+			  void *addr, size_t addr_sz, time_t timeout)
+{
+	long flags;
+	int err;
+
+	if (device != NULL) {
+		err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, device,
+				 strlen(device) + 1);
+		if (err < 0)
+			test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
+	}
+
+	if (!timeout) {
+		err = connect(sk, addr, addr_sz);
+		if (err) {
+			err = -errno;
+			goto out;
+		}
+		return 0;
+	}
+
+	flags = fcntl(sk, F_GETFL);
+	if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+		test_error("fcntl()");
+
+	if (connect(sk, addr, addr_sz) < 0) {
+		if (errno != EINPROGRESS) {
+			err = -errno;
+			goto out;
+		}
+		if (timeout < 0)
+			return sk;
+		err = test_wait_fd(sk, timeout, 1);
+		if (err)
+			goto out;
+	}
+	return sk;
+
+out:
+	close(sk);
+	return err;
+}
+
+int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix,
+		   int vrf, const char *password)
+{
+	size_t pwd_len = strlen(password);
+	struct tcp_md5sig md5sig = {};
+
+	md5sig.tcpm_keylen = pwd_len;
+	memcpy(md5sig.tcpm_key, password, pwd_len);
+	md5sig.tcpm_flags = TCP_MD5SIG_FLAG_PREFIX;
+	md5sig.tcpm_prefixlen = prefix;
+	if (vrf >= 0) {
+		md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
+		md5sig.tcpm_ifindex = (uint8_t)vrf;
+	}
+	memcpy(&md5sig.tcpm_addr, addr, addr_sz);
+
+	errno = 0;
+	return setsockopt(sk, IPPROTO_TCP, TCP_MD5SIG_EXT,
+			&md5sig, sizeof(md5sig));
+}
+
+
+int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg,
+		void *addr, size_t addr_sz, bool set_current, bool set_rnext,
+		uint8_t prefix, uint8_t vrf, uint8_t sndid, uint8_t rcvid,
+		uint8_t maclen, uint8_t keyflags,
+		uint8_t keylen, const char *key)
+{
+	memset(ao, 0, sizeof(struct tcp_ao_add));
+
+	ao->set_current	= !!set_current;
+	ao->set_rnext	= !!set_rnext;
+	ao->prefix	= prefix;
+	ao->sndid	= sndid;
+	ao->rcvid	= rcvid;
+	ao->maclen	= maclen;
+	ao->keyflags	= keyflags;
+	ao->keylen	= keylen;
+	ao->ifindex	= vrf;
+
+	memcpy(&ao->addr, addr, addr_sz);
+
+	if (strlen(alg) > 64)
+		return -ENOBUFS;
+	strncpy(ao->alg_name, alg, 64);
+
+	memcpy(ao->key, key,
+	       (keylen > TCP_AO_MAXKEYLEN) ? TCP_AO_MAXKEYLEN : keylen);
+	return 0;
+}
+
+static int test_get_ao_keys_nr(int sk)
+{
+	struct tcp_ao_getsockopt tmp = {};
+	socklen_t tmp_sz = sizeof(tmp);
+	int ret;
+
+	tmp.nkeys  = 1;
+	tmp.get_all = 1;
+
+	ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz);
+	if (ret)
+		return -errno;
+	return (int)tmp.nkeys;
+}
+
+int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out,
+		void *addr, size_t addr_sz, uint8_t prefix,
+		uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_getsockopt tmp = {};
+	socklen_t tmp_sz = sizeof(tmp);
+	int ret;
+
+	memcpy(&tmp.addr, addr, addr_sz);
+	tmp.prefix = prefix;
+	tmp.sndid  = sndid;
+	tmp.rcvid  = rcvid;
+	tmp.nkeys  = 1;
+
+	ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz);
+	if (ret)
+		return ret;
+	if (tmp.nkeys != 1)
+		return -E2BIG;
+	*out = tmp;
+	return 0;
+}
+
+int test_get_ao_info(int sk, struct tcp_ao_info_opt *out)
+{
+	socklen_t sz = sizeof(*out);
+
+	out->reserved = 0;
+	out->reserved2 = 0;
+	if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, out, &sz))
+		return -errno;
+	if (sz != sizeof(*out))
+		return -EMSGSIZE;
+	return 0;
+}
+
+int test_set_ao_info(int sk, struct tcp_ao_info_opt *in)
+{
+	socklen_t sz = sizeof(*in);
+
+	in->reserved = 0;
+	in->reserved2 = 0;
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, in, sz))
+		return -errno;
+	return 0;
+}
+
+int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a,
+				   const struct tcp_ao_getsockopt *b)
+{
+	bool is_kdf_aes_128_cmac = false;
+	bool is_cmac_aes = false;
+
+	if (!strcmp("cmac(aes128)", a->alg_name)) {
+		is_kdf_aes_128_cmac = (a->keylen != 16);
+		is_cmac_aes = true;
+	}
+
+#define __cmp_ao(member)						\
+do {									\
+	if (b->member != a->member) {					\
+		test_fail("getsockopt(): " __stringify(member) " %u != %u",	\
+				b->member, a->member);			\
+		return -1;						\
+	}								\
+} while(0)
+	__cmp_ao(sndid);
+	__cmp_ao(rcvid);
+	__cmp_ao(prefix);
+	__cmp_ao(keyflags);
+	__cmp_ao(ifindex);
+	if (a->maclen) {
+		__cmp_ao(maclen);
+	} else if (b->maclen != 12) {
+		test_fail("getsockopt(): expected default maclen 12, but it's %u",
+				b->maclen);
+		return -1;
+	}
+	if (!is_kdf_aes_128_cmac) {
+		__cmp_ao(keylen);
+	} else if (b->keylen != 16) {
+		test_fail("getsockopt(): expected keylen 16 for cmac(aes128), but it's %u",
+				b->keylen);
+		return -1;
+	}
+#undef __cmp_ao
+	if (!is_kdf_aes_128_cmac && memcmp(b->key, a->key, a->keylen)) {
+		test_fail("getsockopt(): returned key is different `%s' != `%s'",
+				b->key, a->key);
+		return -1;
+	}
+	if (memcmp(&b->addr, &a->addr, sizeof(b->addr))) {
+		test_fail("getsockopt(): returned address is different");
+		return -1;
+	}
+	if (!is_cmac_aes && strcmp(b->alg_name, a->alg_name)) {
+		test_fail("getsockopt(): returned algorithm %s is different than %s", b->alg_name, a->alg_name);
+		return -1;
+	}
+	if (is_cmac_aes && strcmp(b->alg_name, "cmac(aes)")) {
+		test_fail("getsockopt(): returned algorithm %s is different than cmac(aes)", b->alg_name);
+		return -1;
+	}
+	/* For a established key rotation test don't add a key with
+	 * set_current = 1, as it's likely to change by peer's request;
+	 * rather use setsockopt(TCP_AO_INFO)
+	 */
+	if (a->set_current != b->is_current) {
+		test_fail("getsockopt(): returned key is not Current_key");
+		return -1;
+	}
+	if (a->set_rnext != b->is_rnext) {
+		test_fail("getsockopt(): returned key is not RNext_key");
+		return -1;
+	}
+
+	return 0;
+}
+
+int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a,
+				      const struct tcp_ao_info_opt *b)
+{
+	/* No check for ::current_key, as it may change by the peer */
+	if (a->ao_required != b->ao_required) {
+		test_fail("getsockopt(): returned ao doesn't have ao_required");
+		return -1;
+	}
+	if (a->accept_icmps != b->accept_icmps) {
+		test_fail("getsockopt(): returned ao doesn't accept ICMPs");
+		return -1;
+	}
+	if (a->set_rnext && a->rnext != b->rnext) {
+		test_fail("getsockopt(): RNext KeyID has changed");
+		return -1;
+	}
+#define __cmp_cnt(member)						\
+do {									\
+	if (b->member != a->member) {					\
+		test_fail("getsockopt(): " __stringify(member) " %llu != %llu",	\
+				b->member, a->member);			\
+		return -1;						\
+	}								\
+} while(0)
+	if (a->set_counters) {
+		__cmp_cnt(pkt_good);
+		__cmp_cnt(pkt_bad);
+		__cmp_cnt(pkt_key_not_found);
+		__cmp_cnt(pkt_ao_required);
+		__cmp_cnt(pkt_dropped_icmp);
+	}
+#undef __cmp_cnt
+	return 0;
+}
+
+int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
+{
+	struct tcp_ao_getsockopt *key_dump;
+	socklen_t key_dump_sz = sizeof(*key_dump);
+	struct tcp_ao_info_opt info = {};
+	bool c1, c2, c3, c4, c5;
+	struct netstat *ns;
+	int err, nr_keys;
+
+	memset(out, 0, sizeof(*out));
+
+	/* per-netns */
+	ns = netstat_read();
+	out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
+	out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
+	out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
+	out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
+	out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+	netstat_free(ns);
+	if (c1 || c2 || c3 || c4 || c5)
+		return -EOPNOTSUPP;
+
+	err = test_get_ao_info(sk, &info);
+	if (err)
+		return err;
+
+	/* per-socket */
+	out->ao_info_pkt_good		= info.pkt_good;
+	out->ao_info_pkt_bad		= info.pkt_bad;
+	out->ao_info_pkt_key_not_found	= info.pkt_key_not_found;
+	out->ao_info_pkt_ao_required	= info.pkt_ao_required;
+	out->ao_info_pkt_dropped_icmp	= info.pkt_dropped_icmp;
+
+	/* per-key */
+	nr_keys = test_get_ao_keys_nr(sk);
+	if (nr_keys < 0)
+		return nr_keys;
+	if (nr_keys == 0)
+		test_error("test_get_ao_keys_nr() == 0");
+	out->nr_keys = (size_t)nr_keys;
+	key_dump = calloc(nr_keys, key_dump_sz);
+	if (!key_dump)
+		return -errno;
+
+	key_dump[0].nkeys = nr_keys;
+	key_dump[0].get_all = 1;
+	key_dump[0].get_all = 1;
+	err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS,
+			 key_dump, &key_dump_sz);
+	if (err) {
+		free(key_dump);
+		return -errno;
+	}
+
+	out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0]));
+	if (!out->key_cnts) {
+		free(key_dump);
+		return -errno;
+	}
+
+	while (nr_keys--) {
+		out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
+		out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
+		out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
+		out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
+	}
+	free(key_dump);
+
+	return 0;
+}
+
+int __test_tcp_ao_counters_cmp(const char *tst_name,
+			       struct tcp_ao_counters *before,
+			       struct tcp_ao_counters *after,
+			       test_cnt expected)
+{
+#define __cmp_ao(cnt, expecting_inc)					\
+do {									\
+	if (before->cnt > after->cnt) {					\
+		test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \
+			  tst_name ?: "", before->cnt, after->cnt);		\
+		return -1;						\
+	}								\
+	if ((before->cnt != after->cnt) != (expecting_inc)) {		\
+		test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \
+			  tst_name ?: "", (expecting_inc) ? "" : "not ",	\
+			  before->cnt, after->cnt);			\
+		return -1;						\
+	}								\
+} while(0)
+
+	errno = 0;
+	/* per-netns */
+	__cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD));
+	__cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD));
+	__cmp_ao(netns_ao_key_not_found,
+		 !!(expected & TEST_CNT_NS_KEY_NOT_FOUND));
+	__cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED));
+	__cmp_ao(netns_ao_dropped_icmp,
+		 !!(expected & TEST_CNT_NS_DROPPED_ICMP));
+	/* per-socket */
+	__cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD));
+	__cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD));
+	__cmp_ao(ao_info_pkt_key_not_found,
+		 !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND));
+	__cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED));
+	__cmp_ao(ao_info_pkt_dropped_icmp,
+		 !!(expected & TEST_CNT_SOCK_DROPPED_ICMP));
+	return 0;
+#undef __cmp_ao
+}
+
+int test_tcp_ao_key_counters_cmp(const char *tst_name,
+				 struct tcp_ao_counters *before,
+				 struct tcp_ao_counters *after,
+				 test_cnt expected,
+				 int sndid, int rcvid)
+{
+	size_t i;
+#define __cmp_ao(i, cnt, expecting_inc)					\
+do {									\
+	if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) {		\
+		test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \
+			  tst_name ?: "", before->key_cnts[i].cnt,	\
+			  after->key_cnts[i].cnt,			\
+			  before->key_cnts[i].sndid,			\
+			  before->key_cnts[i].rcvid);			\
+		return -1;						\
+	}								\
+	if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) {		\
+		test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \
+			  tst_name ?: "", (expecting_inc) ? "" : "not ",\
+			  before->key_cnts[i].cnt,			\
+			  after->key_cnts[i].cnt,			\
+			  before->key_cnts[i].sndid,			\
+			  before->key_cnts[i].rcvid);			\
+		return -1;						\
+	}								\
+} while(0)
+
+	if (before->nr_keys != after->nr_keys) {
+		test_fail("%s: Keys changed on the socket %zu != %zu",
+			  tst_name, before->nr_keys, after->nr_keys);
+		return -1;
+	}
+
+	/* per-key */
+	i = before->nr_keys;
+	while (i--) {
+		if (sndid >= 0 && before->key_cnts[i].sndid != sndid)
+			continue;
+		if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid)
+			continue;
+		__cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD));
+		__cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD));
+	}
+	return 0;
+#undef __cmp_ao
+}
+
+void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts)
+{
+	free(cnts->key_cnts);
+}
+
+#define TEST_BUF_SIZE 4096
+ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+{
+	ssize_t total = 0;
+
+	do {
+		char buf[TEST_BUF_SIZE];
+		ssize_t bytes, sent;
+		int ret;
+
+		ret = test_wait_fd(sk, timeout_sec, 0);
+		if (ret)
+			return ret;
+
+		bytes = recv(sk, buf, sizeof(buf), 0);
+
+		if (bytes < 0)
+			test_error("recv(): %zd", bytes);
+		if (bytes == 0)
+			break;
+
+		ret = test_wait_fd(sk, timeout_sec, 1);
+		if (ret)
+			return ret;
+
+		sent = send(sk, buf, bytes, 0);
+		if (sent == 0)
+			break;
+		if (sent != bytes)
+			test_error("send()");
+		total += bytes;
+	} while (!quota || total < quota);
+
+	return total;
+}
+
+ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
+			 const size_t msg_len, time_t timeout_sec)
+{
+	char msg[msg_len];
+	int nodelay = 1;
+	size_t i;
+
+	if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay)))
+		test_error("setsockopt(TCP_NODELAY)");
+
+	for (i = 0; i < buf_sz; i += min(msg_len, buf_sz - i)) {
+		size_t sent, bytes = min(msg_len, buf_sz - i);
+		int ret;
+
+		ret = test_wait_fd(sk, timeout_sec, 1);
+		if (ret)
+			return ret;
+
+		sent = send(sk, buf + i, bytes, 0);
+		if (sent == 0)
+			break;
+		if (sent != bytes)
+			test_error("send()");
+
+		bytes = 0;
+		do {
+			ssize_t got;
+
+			ret = test_wait_fd(sk, timeout_sec, 0);
+			if (ret)
+				return ret;
+
+			got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0);
+			if (got <= 0)
+				return i;
+			bytes += got;
+		} while (bytes < sent);
+		if (bytes > sent)
+			test_error("recv(): %zd > %zd", bytes, sent);
+		if (memcmp(buf + i, msg, bytes) != 0) {
+			test_fail("received message differs");
+			return -1;
+		}
+	}
+	return i;
+}
+
+int test_client_verify(int sk, const size_t msg_len, const size_t nr,
+		       time_t timeout_sec)
+{
+	size_t buf_sz = msg_len * nr;
+	char *buf = alloca(buf_sz);
+	ssize_t ret;
+
+	randomize_buffer(buf, buf_sz);
+	ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec);
+	if (ret < 0)
+		return (int)ret;
+	return ret != buf_sz ? -1 : 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c
new file mode 100644
index 000000000000..372daca525f5
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "aolib.h"
+#include <string.h>
+
+void randomize_buffer(void *buf, size_t buflen)
+{
+	int *p = (int *)buf;
+	size_t words = buflen / sizeof(int);
+	size_t leftover = buflen % sizeof(int);
+
+	if (!buflen)
+		return;
+
+	while (words--)
+		*p++ = rand();
+
+	if (leftover) {
+		int tmp = rand();
+
+		memcpy(buf + buflen - leftover, &tmp, leftover);
+	}
+}
+
+const struct sockaddr_in6 addr_any6 = {
+	.sin6_family	= AF_INET6,
+};
+
+const struct sockaddr_in addr_any4 = {
+	.sin_family	= AF_INET,
+};
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
new file mode 100644
index 000000000000..8fdc808df325
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets
+ * It tests that TCP-AO enabled connection can be restored.
+ * For the proper socket repair see:
+ * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h
+ */
+#include <inttypes.h>
+#include "aolib.h"
+
+const size_t nr_packets = 20;
+const size_t msg_len = 100;
+const size_t quota = nr_packets * msg_len;
+#define fault(type)	(inj == FAULT_ ## type)
+
+static void try_server_run(const char *tst_name, unsigned int port,
+			   fault_t inj, test_cnt cnt_expected)
+{
+	const char *cnt_name = "TCPAOGood";
+	struct tcp_ao_counters ao1, ao2;
+	uint64_t before_cnt, after_cnt;
+	int sk, lsk;
+	time_t timeout;
+	ssize_t bytes;
+
+	if (fault(TIMEOUT))
+		cnt_name = "TCPAOBad";
+	lsk = test_listen_socket(this_ip_addr, port, 1);
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	synchronize_threads(); /* 1: MKT added => connect() */
+
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	close(lsk);
+
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota) {
+		test_fail("%s: server served: %zd", tst_name, bytes);
+		goto out;
+	}
+
+	before_cnt = netstat_get_one(cnt_name, NULL);
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	bytes = test_server_run(sk, quota, timeout);
+	if (fault(TIMEOUT)) {
+		if (bytes > 0)
+			test_fail("%s: server served: %zd", tst_name, bytes);
+		else
+			test_ok("%s: server couldn't serve", tst_name);
+	} else {
+		if (bytes != quota)
+			test_fail("%s: server served: %zd", tst_name, bytes);
+		else
+			test_ok("%s: server alive", tst_name);
+	}
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	after_cnt = netstat_get_one(cnt_name, NULL);
+
+	test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+
+	if (after_cnt <= before_cnt) {
+		test_fail("%s: %s counter did not increase: %zu <= %zu",
+				tst_name, cnt_name, after_cnt, before_cnt);
+	} else {
+		test_ok("%s: counter %s increased %zu => %zu",
+			tst_name, cnt_name, before_cnt, after_cnt);
+	}
+
+	/*
+	 * Before close() as that will send FIN and move the peer in TCP_CLOSE
+	 * and that will prevent reading AO counters from the peer's socket.
+	 */
+	synchronize_threads(); /* 3: verified => closed */
+out:
+	close(sk);
+}
+
+static void *server_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+
+	try_server_run("TCP-AO migrate to another socket", port++,
+		       0, TEST_CNT_GOOD);
+	try_server_run("TCP-AO with wrong send ISN", port++,
+		       FAULT_TIMEOUT, TEST_CNT_BAD);
+	try_server_run("TCP-AO with wrong receive ISN", port++,
+		       FAULT_TIMEOUT, TEST_CNT_BAD);
+	try_server_run("TCP-AO with wrong send SEQ ext number", port++,
+		       FAULT_TIMEOUT, TEST_CNT_BAD);
+	try_server_run("TCP-AO with wrong receive SEQ ext number", port++,
+		       FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+
+	synchronize_threads(); /* don't race to exit: client exits */
+	return NULL;
+}
+
+static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr,
+				   struct tcp_sock_state *img,
+				   struct tcp_ao_repair *ao_img)
+{
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+	if (test_connect_socket(sk, this_ip_dest, server_port) <= 0)
+		test_error("failed to connect()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+		test_fail("pre-migrate verify failed");
+
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, img, saddr);
+	test_ao_checkpoint(sk, ao_img);
+	test_kill_sk(sk);
+}
+
+static void test_sk_restore(const char *tst_name, unsigned int server_port,
+			    sockaddr_af *saddr, struct tcp_sock_state *img,
+			    struct tcp_ao_repair *ao_img,
+			    fault_t inj, test_cnt cnt_expected)
+{
+	const char *cnt_name = "TCPAOGood";
+	struct tcp_ao_counters ao1, ao2;
+	uint64_t before_cnt, after_cnt;
+	time_t timeout;
+	int sk;
+
+	if (fault(TIMEOUT))
+		cnt_name = "TCPAOBad";
+
+	before_cnt = netstat_get_one(cnt_name, NULL);
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	test_enable_repair(sk);
+	test_sock_restore(sk, img, saddr, this_ip_dest, server_port);
+	if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	test_ao_restore(sk, ao_img);
+
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	test_disable_repair(sk);
+	test_sock_state_free(img);
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	if (test_client_verify(sk, msg_len, nr_packets, timeout)) {
+		if (fault(TIMEOUT))
+			test_ok("%s: post-migrate connection is broken", tst_name);
+		else
+			test_fail("%s: post-migrate connection is working", tst_name);
+	} else {
+		if (fault(TIMEOUT))
+			test_fail("%s: post-migrate connection still working", tst_name);
+		else
+			test_ok("%s: post-migrate connection is alive", tst_name);
+	}
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	after_cnt = netstat_get_one(cnt_name, NULL);
+
+	test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+
+	if (after_cnt <= before_cnt) {
+		test_fail("%s: %s counter did not increase: %zu <= %zu",
+				tst_name, cnt_name, after_cnt, before_cnt);
+	} else {
+		test_ok("%s: counter %s increased %zu => %zu",
+			tst_name, cnt_name, before_cnt, after_cnt);
+	}
+	synchronize_threads(); /* 3: verified => closed */
+	close(sk);
+}
+
+static void *client_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+	struct tcp_sock_state tcp_img;
+	struct tcp_ao_repair ao_img;
+	sockaddr_af saddr;
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	test_sk_restore("TCP-AO migrate to another socket", port++,
+			&saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD);
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	ao_img.snt_isn += 1;
+	test_sk_restore("TCP-AO with wrong send ISN", port++,
+			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	ao_img.rcv_isn += 1;
+	test_sk_restore("TCP-AO with wrong receive ISN", port++,
+			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	ao_img.snd_sne += 1;
+	test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
+			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+			TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	ao_img.rcv_sne += 1;
+	test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
+			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+			TEST_CNT_NS_GOOD | TEST_CNT_BAD);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(20, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
new file mode 100644
index 000000000000..7df8b8700e39
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The test checks that both active and passive reset have correct TCP-AO
+ * signature. An "active" reset (abort) here is procured from closing
+ * listen() socket with non-accepted connections in the queue:
+ * inet_csk_listen_stop() => inet_child_forget() =>
+ *                        => tcp_disconnect() => tcp_send_active_reset()
+ *
+ * The passive reset is quite hard to get on established TCP connections.
+ * It could be procured from non-established states, but the synchronization
+ * part from userspace in order to reliably get RST seems uneasy.
+ * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state.
+ *
+ * It's important to test both passive and active RST as they go through
+ * different code-paths:
+ * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb()
+ * - tcp_v*_send_reset() create their reply skbs and send them with
+ *   ip_send_unicast_reply()
+ *
+ * In both cases TCP-AO signatures have to be correct, which is verified by
+ * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters.
+ *
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+const size_t quota = 1000;
+const size_t packet_sz = 100;
+/*
+ * Backlog == 0 means 1 connection in queue, see:
+ * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...")
+ */
+const unsigned int backlog;
+
+static void netstats_check(struct netstat *before, struct netstat *after,
+			   char *msg)
+{
+	uint64_t before_cnt, after_cnt;
+
+	before_cnt = netstat_get(before, "TCPAORequired", NULL);
+	after_cnt = netstat_get(after, "TCPAORequired", NULL);
+	if (after_cnt > before_cnt)
+		test_fail("Segments without AO sign (%s): %" PRIu64 " => %" PRIu64,
+			  msg, before_cnt, after_cnt);
+	else
+		test_ok("No segments without AO sign (%s)", msg);
+
+	before_cnt = netstat_get(before, "TCPAOGood", NULL);
+	after_cnt = netstat_get(after, "TCPAOGood", NULL);
+	if (after_cnt <= before_cnt)
+		test_fail("Signed AO segments (%s): %" PRIu64 " => %" PRIu64,
+			  msg, before_cnt, after_cnt);
+	else
+		test_ok("Signed AO segments (%s): %" PRIu64 " => %" PRIu64,
+			  msg, before_cnt, after_cnt);
+
+	before_cnt = netstat_get(before, "TCPAOBad", NULL);
+	after_cnt = netstat_get(after, "TCPAOBad", NULL);
+	if (after_cnt > before_cnt)
+		test_fail("Segments with bad AO sign (%s): %" PRIu64 " => %" PRIu64,
+			  msg, before_cnt, after_cnt);
+	else
+		test_ok("No segments with bad AO sign (%s)", msg);
+}
+
+/*
+ * Another way to send RST, but not through tcp_v{4,6}_send_reset()
+ * is tcp_send_active_reset(), that is not in reply to inbound segment,
+ * but rather active send. It uses tcp_transmit_skb(), so that should
+ * work, but as it also sends RST - nice that it can be covered as well.
+ */
+static void close_forced(int sk)
+{
+	struct linger sl;
+
+	sl.l_onoff = 1;
+	sl.l_linger = 0;
+	if (setsockopt(sk, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)))
+		test_error("setsockopt(SO_LINGER)");
+	close(sk);
+}
+
+static void test_server_active_rst(unsigned int port)
+{
+	struct tcp_ao_counters cnt1, cnt2;
+	ssize_t bytes;
+	int sk, lsk;
+
+	lsk = test_listen_socket(this_ip_addr, port, backlog);
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	if (test_get_tcp_ao_counters(lsk, &cnt1))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 1: MKT added */
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads(); /* 2: connection accept()ed, another queued */
+	if (test_get_tcp_ao_counters(lsk, &cnt2))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 3: close listen socket */
+	close(lsk);
+	bytes = test_server_run(sk, quota, 0);
+	if (bytes != quota)
+		test_error("servered only %zd bytes", bytes);
+	else
+		test_ok("servered %zd bytes", bytes);
+
+	synchronize_threads(); /* 4: finishing up */
+	close_forced(sk);
+
+	synchronize_threads(); /* 5: closed active sk */
+
+	synchronize_threads(); /* 6: counters checks */
+	if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
+		test_fail("MKT counters (server) have not only good packets");
+	else
+		test_ok("MKT counters are good on server");
+}
+
+static void test_server_passive_rst(unsigned int port)
+{
+	struct tcp_ao_counters ao1, ao2;
+	int sk, lsk;
+	ssize_t bytes;
+
+	lsk = test_listen_socket(this_ip_addr, port, 1);
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	close(lsk);
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota) {
+		if (bytes > 0)
+			test_fail("server served: %zd", bytes);
+		else
+			test_fail("server returned %zd", bytes);
+	}
+
+	synchronize_threads(); /* 3: checkpoint the client */
+	synchronize_threads(); /* 4: close the server, creating twsk */
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	close(sk);
+
+	synchronize_threads(); /* 5: restore the socket, send more data */
+	test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
+
+	synchronize_threads(); /* 6: server exits */
+}
+
+static void *server_fn(void *arg)
+{
+	struct netstat *ns_before, *ns_after;
+	unsigned int port = test_server_port;
+
+	ns_before = netstat_read();
+
+	test_server_active_rst(port++);
+	test_server_passive_rst(port++);
+
+	ns_after = netstat_read();
+	netstats_check(ns_before, ns_after, "server");
+	netstat_free(ns_after);
+	netstat_free(ns_before);
+	synchronize_threads(); /* exit */
+
+	synchronize_threads(); /* don't race to exit() - client exits */
+	return NULL;
+}
+
+static int test_wait_fds(int sk[], size_t nr, bool is_writable[],
+			 ssize_t wait_for, time_t sec)
+{
+	struct timeval tv = { .tv_sec = sec };
+	struct timeval *ptv = NULL;
+	fd_set left;
+	size_t i;
+	int ret;
+
+	FD_ZERO(&left);
+	for (i = 0; i < nr; i++) {
+		FD_SET(sk[i], &left);
+		if (is_writable)
+			is_writable[i] = false;
+	}
+
+	if (sec)
+		ptv = &tv;
+
+	do {
+		bool is_empty = true;
+		fd_set fds, efds;
+		int nfd = 0;
+
+		FD_ZERO(&fds);
+		FD_ZERO(&efds);
+		for (i = 0; i < nr; i++) {
+			if (!FD_ISSET(sk[i], &left))
+				continue;
+
+			if (sk[i] > nfd)
+				nfd = sk[i];
+
+			FD_SET(sk[i], &fds);
+			FD_SET(sk[i], &efds);
+			is_empty = false;
+		}
+		if (is_empty)
+			return -ENOENT;
+
+		errno = 0;
+		ret = select(nfd + 1, NULL, &fds, &efds, ptv);
+		if (ret < 0)
+			return -errno;
+		if (!ret)
+			return -ETIMEDOUT;
+		for (i = 0; i < nr; i++) {
+			if (FD_ISSET(sk[i], &fds)) {
+				if (is_writable)
+					is_writable[i] = true;
+				FD_CLR(sk[i], &left);
+				wait_for--;
+				continue;
+			}
+			if (FD_ISSET(sk[i], &efds)) {
+				FD_CLR(sk[i], &left);
+				wait_for--;
+			}
+		}
+	} while (wait_for > 0);
+
+	return 0;
+}
+
+static void test_client_active_rst(unsigned int port)
+{
+	/* one in queue, another accept()ed */
+	unsigned int wait_for = backlog + 2;
+	int i, sk[3], err;
+	bool is_writable[ARRAY_SIZE(sk)] = {false};
+	unsigned int last = ARRAY_SIZE(sk) - 1;
+
+	for (i = 0; i < ARRAY_SIZE(sk); i++) {
+		sk[i] = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+		if (sk[i] < 0)
+			test_error("socket()");
+		if (test_add_key(sk[i], DEFAULT_TEST_PASSWORD,
+				 this_ip_dest, -1, 100, 100))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	}
+
+	synchronize_threads(); /* 1: MKT added */
+	for (i = 0; i < last; i++) {
+		err = _test_connect_socket(sk[i], this_ip_dest, port,
+					       (i == 0) ? TEST_TIMEOUT_SEC : -1);
+
+		if (err < 0)
+			test_error("failed to connect()");
+	}
+
+	synchronize_threads(); /* 2: connection accept()ed, another queued */
+	err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC);
+	if (err < 0)
+		test_error("test_wait_fds(): %d", err);
+
+	synchronize_threads(); /* 3: close listen socket */
+	if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+		test_fail("Failed to send data on connected socket");
+	else
+		test_ok("Verified established tcp connection");
+
+	synchronize_threads(); /* 4: finishing up */
+	err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+	if (err < 0)
+		test_error("failed to connect()");
+
+	synchronize_threads(); /* 5: closed active sk */
+	err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL,
+			    wait_for, TEST_TIMEOUT_SEC);
+	if (err < 0)
+		test_error("select(): %d", err);
+
+	for (i = 0; i < ARRAY_SIZE(sk); i++) {
+		socklen_t slen = sizeof(err);
+
+		if (getsockopt(sk[i], SOL_SOCKET, SO_ERROR, &err, &slen))
+			test_error("getsockopt()");
+		if (is_writable[i] && err != ECONNRESET) {
+			test_fail("sk[%d] = %d, err = %d, connection wasn't reset",
+				  i, sk[i], err);
+		} else {
+			test_ok("sk[%d] = %d%s", i, sk[i],
+				is_writable[i] ? ", connection was reset" : "");
+		}
+	}
+	synchronize_threads(); /* 6: counters checks */
+}
+
+static void test_client_passive_rst(unsigned int port)
+{
+	struct tcp_ao_counters ao1, ao2;
+	struct tcp_ao_repair ao_img;
+	struct tcp_sock_state img;
+	sockaddr_af saddr;
+	int sk, err;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+	if (test_connect_socket(sk, this_ip_dest, port) <= 0)
+		test_error("failed to connect()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+		test_fail("Failed to send data on connected socket");
+	else
+		test_ok("Verified established tcp connection");
+
+	synchronize_threads(); /* 3: checkpoint the client */
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, &img, &saddr);
+	test_ao_checkpoint(sk, &ao_img);
+	test_disable_repair(sk);
+
+	synchronize_threads(); /* 4: close the server, creating twsk */
+
+	/*
+	 * The "corruption" in SEQ has to be small enough to fit into TCP
+	 * window, see tcp_timewait_state_process() for out-of-window
+	 * segments.
+	 */
+	img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */
+
+	/*
+	 * FIXME: This is kind-of ugly and dirty, but it works.
+	 *
+	 * At this moment, the server has close'ed(sk).
+	 * The passive RST that is being targeted here is new data after
+	 * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST
+	 *
+	 * What is needed here is:
+	 * (1) wait for FIN from the server
+	 * (2) make sure that the ACK from the client went out
+	 * (3) make sure that the ACK was received and processed by the server
+	 *
+	 * Otherwise, the data that will be sent from "repaired" socket
+	 * post SEQ corruption may get to the server before it's in
+	 * TCP_FIN_WAIT2.
+	 *
+	 * (1) is easy with select()/poll()
+	 * (2) is possible by polling tcpi_state from TCP_INFO
+	 * (3) is quite complex: as server's socket was already closed,
+	 *     probably the way to do it would be tcp-diag.
+	 */
+	sleep(TEST_RETRANSMIT_SEC);
+
+	synchronize_threads(); /* 5: restore the socket, send more data */
+	test_kill_sk(sk);
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	test_enable_repair(sk);
+	test_sock_restore(sk, &img, &saddr, this_ip_dest, port);
+	if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	test_ao_restore(sk, &ao_img);
+
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	test_disable_repair(sk);
+	test_sock_state_free(&img);
+
+	/*
+	 * This is how "passive reset" is acquired in this test from TCP_TW_RST:
+	 *
+	 * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249,
+	 *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100
+	 * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249,
+	 *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0
+	 * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251,
+	 *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0
+	 * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251,
+	 *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100
+	 * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0,
+	 *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0
+	 */
+	err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC);
+	/* Make sure that the connection was reset, not timeouted */
+	if (err && err == -ECONNRESET)
+		test_ok("client sock was passively reset post-seq-adjust");
+	else if (err)
+		test_fail("client sock was not reset post-seq-adjust: %d", err);
+	else
+		test_fail("client sock is yet connected post-seq-adjust");
+
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 6: server exits */
+	close(sk);
+	test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
+}
+
+static void *client_fn(void *arg)
+{
+	struct netstat *ns_before, *ns_after;
+	unsigned int port = test_server_port;
+
+	ns_before = netstat_read();
+
+	test_client_active_rst(port++);
+	test_client_passive_rst(port++);
+
+	ns_after = netstat_read();
+	netstats_check(ns_before, ns_after, "client");
+	netstat_free(ns_after);
+	netstat_free(ns_before);
+
+	synchronize_threads(); /* exit */
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(14, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
new file mode 100644
index 000000000000..e154d9e198a9
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+static union tcp_addr local_addr;
+
+static void __setup_lo_intf(const char *lo_intf,
+			    const char *addr_str, uint8_t prefix)
+{
+	if (inet_pton(TEST_FAMILY, addr_str, &local_addr) != 1)
+		test_error("Can't convert local ip address");
+
+	if (ip_addr_add(lo_intf, TEST_FAMILY, local_addr, prefix))
+		test_error("Failed to add %s ip address", lo_intf);
+
+	if (link_set_up(lo_intf))
+		test_error("Failed to bring %s up", lo_intf);
+}
+
+static void setup_lo_intf(const char *lo_intf)
+{
+#ifdef IPV6_TEST
+	__setup_lo_intf(lo_intf, "::1", 128);
+#else
+	__setup_lo_intf(lo_intf, "127.0.0.1", 8);
+#endif
+}
+
+static void tcp_self_connect(const char *tst, unsigned int port,
+			     bool different_keyids, bool check_restore)
+{
+	uint64_t before_challenge_ack, after_challenge_ack;
+	uint64_t before_syn_challenge, after_syn_challenge;
+	struct tcp_ao_counters before_ao, after_ao;
+	uint64_t before_aogood, after_aogood;
+	struct netstat *ns_before, *ns_after;
+	const size_t nr_packets = 20;
+	struct tcp_ao_repair ao_img;
+	struct tcp_sock_state img;
+	sockaddr_af addr;
+	int sk;
+
+	tcp_addr_to_sockaddr_in(&addr, &local_addr, htons(port));
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (different_keyids) {
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 5, 7))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 7, 5))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	} else {
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 100, 100))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	}
+
+	if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		test_error("bind()");
+
+	ns_before = netstat_read();
+	before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
+	before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL);
+	before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL);
+	if (test_get_tcp_ao_counters(sk, &before_ao))
+		test_error("test_get_tcp_ao_counters()");
+
+	if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr,
+				  sizeof(addr), TEST_TIMEOUT_SEC) < 0) {
+		ns_after = netstat_read();
+		netstat_print_diff(ns_before, ns_after);
+		test_error("failed to connect()");
+	}
+
+	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+		test_fail("%s: tcp connection verify failed", tst);
+		close(sk);
+		return;
+	}
+
+	ns_after = netstat_read();
+	after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+	after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL);
+	after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL);
+	if (test_get_tcp_ao_counters(sk, &after_ao))
+		test_error("test_get_tcp_ao_counters()");
+	if (!check_restore) {
+		/* to debug: netstat_print_diff(ns_before, ns_after); */
+		netstat_free(ns_before);
+	}
+	netstat_free(ns_after);
+
+	if (after_aogood <= before_aogood) {
+		test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+			  tst, after_aogood, before_aogood);
+		close(sk);
+		return;
+	}
+	if (after_challenge_ack <= before_challenge_ack ||
+	    after_syn_challenge <= before_syn_challenge) {
+		/*
+		 * It's also meant to test simultaneous open, so check
+		 * these counters as well.
+		 */
+		test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu",
+			  tst, after_challenge_ack, before_challenge_ack,
+			  after_syn_challenge, before_syn_challenge);
+		close(sk);
+		return;
+	}
+
+	if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) {
+		close(sk);
+		return;
+	}
+
+	if (!check_restore) {
+		test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64,
+				tst, before_aogood, after_aogood);
+		close(sk);
+		return;
+	}
+
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, &img, &addr);
+#ifdef IPV6_TEST
+	addr.sin6_port = htons(port + 1);
+#else
+	addr.sin_port = htons(port + 1);
+#endif
+	test_ao_checkpoint(sk, &ao_img);
+	test_kill_sk(sk);
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	test_enable_repair(sk);
+	__test_sock_restore(sk, "lo", &img, &addr, &addr, sizeof(addr));
+	if (different_keyids) {
+		if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+					  local_addr, -1, 7, 5))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+		if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+					  local_addr, -1, 5, 7))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	} else {
+		if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+					  local_addr, -1, 100, 100))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	}
+	test_ao_restore(sk, &ao_img);
+	test_disable_repair(sk);
+	test_sock_state_free(&img);
+	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+		test_fail("%s: tcp connection verify failed", tst);
+		close(sk);
+		return;
+	}
+	ns_after = netstat_read();
+	after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+	/* to debug: netstat_print_diff(ns_before, ns_after); */
+	netstat_free(ns_before);
+	netstat_free(ns_after);
+	close(sk);
+	if (after_aogood <= before_aogood) {
+		test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+			  tst, after_aogood, before_aogood);
+		return;
+	}
+	test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64,
+			tst, before_aogood, after_aogood);
+}
+
+static void *client_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+
+	setup_lo_intf("lo");
+
+	tcp_self_connect("self-connect(same keyids)", port++, false, false);
+	tcp_self_connect("self-connect(different keyids)", port++, true, false);
+	tcp_self_connect("self-connect(restore)", port, false, true);
+	port += 2;
+	tcp_self_connect("self-connect(restore, different keyids)", port, true, true);
+	port += 2;
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(4, client_fn, NULL);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
new file mode 100644
index 000000000000..ad4e77d6823e
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Check that after SEQ number wrap-around:
+ * 1. SEQ-extension has upper bytes set
+ * 2. TCP conneciton is alive and no TCPAOBad segments
+ * In order to test (2), the test doesn't just adjust seq number for a queue
+ * on a connected socket, but migrates it to another sk+port number, so
+ * that there won't be any delayed packets that will fail to verify
+ * with the new SEQ numbers.
+ */
+#include <inttypes.h>
+#include "aolib.h"
+
+const unsigned int nr_packets = 1000;
+const unsigned int msg_len = 1000;
+const unsigned int quota = nr_packets * msg_len;
+unsigned int client_new_port;
+
+/* Move them closer to roll-over */
+static void test_adjust_seqs(struct tcp_sock_state *img,
+			     struct tcp_ao_repair *ao_img,
+			     bool server)
+{
+	uint32_t new_seq1, new_seq2;
+
+	/* make them roll-over during quota, but on different segments */
+	if (server) {
+		new_seq1 = ((uint32_t)-1) - msg_len;
+		new_seq2 = ((uint32_t)-1) - (quota - 2 * msg_len);
+	} else {
+		new_seq1 = ((uint32_t)-1) - (quota - 2 * msg_len);
+		new_seq2 = ((uint32_t)-1) - msg_len;
+	}
+
+	img->in.seq = new_seq1;
+	img->trw.snd_wl1 = img->in.seq - msg_len;
+	img->out.seq = new_seq2;
+	img->trw.rcv_wup = img->in.seq;
+}
+
+static int test_sk_restore(struct tcp_sock_state *img,
+			   struct tcp_ao_repair *ao_img, sockaddr_af *saddr,
+			   const union tcp_addr daddr, unsigned int dport,
+			   struct tcp_ao_counters *cnt)
+{
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	test_enable_repair(sk);
+	test_sock_restore(sk, img, saddr, daddr, dport);
+	if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, daddr, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	test_ao_restore(sk, ao_img);
+
+	if (test_get_tcp_ao_counters(sk, cnt))
+		test_error("test_get_tcp_ao_counters()");
+
+	test_disable_repair(sk);
+	test_sock_state_free(img);
+	return sk;
+}
+
+static void *server_fn(void *arg)
+{
+	uint64_t before_good, after_good, after_bad;
+	struct tcp_ao_counters ao1, ao2;
+	struct tcp_sock_state img;
+	struct tcp_ao_repair ao_img;
+	sockaddr_af saddr;
+	ssize_t bytes;
+	int sk, lsk;
+
+	lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	close(lsk);
+
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota) {
+		if (bytes > 0)
+			test_fail("server served: %zd", bytes);
+		else
+			test_fail("server returned: %zd", bytes);
+		goto out;
+	}
+
+	before_good = netstat_get_one("TCPAOGood", NULL);
+
+	synchronize_threads(); /* 3: restore the connection on another port */
+
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, &img, &saddr);
+	test_ao_checkpoint(sk, &ao_img);
+	test_kill_sk(sk);
+#ifdef IPV6_TEST
+	saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1);
+#else
+	saddr.sin_port = htons(ntohs(saddr.sin_port) + 1);
+#endif
+	test_adjust_seqs(&img, &ao_img, true);
+	synchronize_threads(); /* 4: dump finished */
+	sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
+			     client_new_port, &ao1);
+
+	synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota) {
+		if (bytes > 0)
+			test_fail("server served: %zd", bytes);
+		else
+			test_fail("server returned: %zd", bytes);
+	} else {
+		test_ok("server alive");
+	}
+
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	after_good = netstat_get_one("TCPAOGood", NULL);
+
+	test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+
+	if (after_good <= before_good) {
+		test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+			  after_good, before_good);
+	} else {
+		test_ok("TCPAOGood counter increased %zu => %zu",
+			before_good, after_good);
+	}
+	after_bad = netstat_get_one("TCPAOBad", NULL);
+	if (after_bad)
+		test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+	else
+		test_ok("TCPAOBad counter didn't increase");
+	test_enable_repair(sk);
+	test_ao_checkpoint(sk, &ao_img);
+	if (ao_img.snd_sne && ao_img.rcv_sne) {
+		test_ok("SEQ extension incremented: %u/%u",
+			ao_img.snd_sne, ao_img.rcv_sne);
+	} else {
+		test_fail("SEQ extension was not incremented: %u/%u",
+			  ao_img.snd_sne, ao_img.rcv_sne);
+	}
+
+	synchronize_threads(); /* 6: verified => closed */
+out:
+	close(sk);
+	return NULL;
+}
+
+static void *client_fn(void *arg)
+{
+	uint64_t before_good, after_good, after_bad;
+	struct tcp_ao_counters ao1, ao2;
+	struct tcp_sock_state img;
+	struct tcp_ao_repair ao_img;
+	sockaddr_af saddr;
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+	if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+		test_error("failed to connect()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+		test_fail("pre-migrate verify failed");
+		return NULL;
+	}
+
+	before_good = netstat_get_one("TCPAOGood", NULL);
+
+	synchronize_threads(); /* 3: restore the connection on another port */
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, &img, &saddr);
+	test_ao_checkpoint(sk, &ao_img);
+	test_kill_sk(sk);
+#ifdef IPV6_TEST
+	client_new_port = ntohs(saddr.sin6_port) + 1;
+	saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1);
+#else
+	client_new_port = ntohs(saddr.sin_port) + 1;
+	saddr.sin_port = htons(ntohs(saddr.sin_port) + 1);
+#endif
+	test_adjust_seqs(&img, &ao_img, false);
+	synchronize_threads(); /* 4: dump finished */
+	sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
+			     test_server_port + 1, &ao1);
+
+	synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+		test_fail("post-migrate verify failed");
+	else
+		test_ok("post-migrate connection alive");
+
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	after_good = netstat_get_one("TCPAOGood", NULL);
+
+	test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+
+	if (after_good <= before_good) {
+		test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+			  after_good, before_good);
+	} else {
+		test_ok("TCPAOGood counter increased %zu => %zu",
+			before_good, after_good);
+	}
+	after_bad = netstat_get_one("TCPAOBad", NULL);
+	if (after_bad)
+		test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+	else
+		test_ok("TCPAOBad counter didn't increase");
+
+	synchronize_threads(); /* 6: verified => closed */
+	close(sk);
+
+	synchronize_threads(); /* don't race to exit: let server exit() */
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(7, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
new file mode 100644
index 000000000000..452de131fa3a
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -0,0 +1,835 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+static union tcp_addr tcp_md5_client;
+
+static int test_port = 7788;
+static void make_listen(int sk)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &this_ip_addr, htons(test_port++));
+	if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		test_error("bind()");
+	if (listen(sk, 1))
+		test_error("listen()");
+}
+
+static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
+				const char *tst)
+{
+	struct tcp_ao_info_opt tmp;
+	socklen_t len = sizeof(tmp);
+
+	if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+
+#define __cmp_ao(member)							\
+do {										\
+	if (info->member != tmp.member) {					\
+		test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu",	\
+			  tst, (size_t)info->member, (size_t)tmp.member);	\
+		return;								\
+	}									\
+} while(0)
+	if (info->set_current)
+		__cmp_ao(current_key);
+	if (info->set_rnext)
+		__cmp_ao(rnext);
+	if (info->set_counters) {
+		__cmp_ao(pkt_good);
+		__cmp_ao(pkt_bad);
+		__cmp_ao(pkt_key_not_found);
+		__cmp_ao(pkt_ao_required);
+		__cmp_ao(pkt_dropped_icmp);
+	}
+	__cmp_ao(ao_required);
+	__cmp_ao(accept_icmps);
+
+	test_ok("AO info get: %s", tst);
+#undef __cmp_ao
+}
+
+static void __setsockopt_checked(int sk, int optname, bool get,
+				 void *optval, socklen_t *len,
+				 int err, const char *tst, const char *tst2)
+{
+	int ret;
+
+	if (!tst)
+		tst = "";
+	if (!tst2)
+		tst2 = "";
+
+	errno = 0;
+	if (get)
+		ret = getsockopt(sk, IPPROTO_TCP, optname, optval, len);
+	else
+		ret = setsockopt(sk, IPPROTO_TCP, optname, optval, *len);
+	if (ret == -1) {
+		if (errno == err)
+			test_ok("%s%s", tst ?: "", tst2 ?: "");
+		else
+			test_fail("%s%s: %setsockopt() failed",
+				  tst, tst2, get ? "g" : "s");
+		close(sk);
+		return;
+	}
+
+	if (err) {
+		test_fail("%s%s: %setsockopt() was expected to fail with %d",
+			  tst, tst2, get ? "g" : "s", err);
+	} else {
+		test_ok("%s%s", tst ?: "", tst2 ?: "");
+		if (optname == TCP_AO_ADD_KEY) {
+			test_verify_socket_key(sk, optval);
+		} else if (optname == TCP_AO_INFO && !get) {
+			test_vefify_ao_info(sk, optval, tst2);
+		} else if (optname == TCP_AO_GET_KEYS) {
+			if (*len != sizeof(struct tcp_ao_getsockopt))
+				test_fail("%s%s: get keys returned wrong tcp_ao_getsockopt size",
+					  tst, tst2);
+		}
+	}
+	close(sk);
+}
+
+static void setsockopt_checked(int sk, int optname, void *optval,
+			       int err, const char *tst)
+{
+	const char *cmd = NULL;
+	socklen_t len;
+
+	switch (optname) {
+	case TCP_AO_ADD_KEY:
+		cmd = "key add: ";
+		len = sizeof(struct tcp_ao_add);
+		break;
+	case TCP_AO_DEL_KEY:
+		cmd = "key del: ";
+		len = sizeof(struct tcp_ao_del);
+		break;
+	case TCP_AO_INFO:
+		cmd = "AO info set: ";
+		len = sizeof(struct tcp_ao_info_opt);
+		break;
+	default:
+		break;
+	}
+
+	__setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst);
+}
+
+static int prepare_defs(int cmd, void *optval)
+{
+	int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+	if (sk < 0)
+		test_error("socket()");
+
+	switch (cmd) {
+	case TCP_AO_ADD_KEY: {
+		struct tcp_ao_add *add = optval;
+
+		if (test_prepare_def_key(add, DEFAULT_TEST_PASSWORD, 0, this_ip_dest,
+					-1, 0, 100, 100))
+			test_error("prepare default tcp_ao_add");
+		break;
+		}
+	case TCP_AO_DEL_KEY: {
+		struct tcp_ao_del *del = optval;
+
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+				 DEFAULT_TEST_PREFIX, 100, 100))
+			test_error("add default key");
+		memset(del, 0, sizeof(struct tcp_ao_del));
+		del->sndid = 100;
+		del->rcvid = 100;
+		del->prefix = DEFAULT_TEST_PREFIX;
+		tcp_addr_to_sockaddr_in(&del->addr, &this_ip_dest, 0);
+		break;
+		}
+	case TCP_AO_INFO: {
+		struct tcp_ao_info_opt *info = optval;
+
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+				 DEFAULT_TEST_PREFIX, 100, 100))
+			test_error("add default key");
+		memset(info, 0, sizeof(struct tcp_ao_info_opt));
+		break;
+		}
+	case TCP_AO_GET_KEYS: {
+		struct tcp_ao_getsockopt *get = optval;
+
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+				 DEFAULT_TEST_PREFIX, 100, 100))
+			test_error("add default key");
+		memset(get, 0, sizeof(struct tcp_ao_getsockopt));
+		get->nkeys = 1;
+		get->get_all = 1;
+		break;
+		}
+	default:
+		test_error("unknown cmd");
+	}
+
+	return sk;
+}
+
+static void test_extend(int cmd, bool get, const char *tst, socklen_t under_size)
+{
+	struct {
+		union {
+			struct tcp_ao_add add;
+			struct tcp_ao_del del;
+			struct tcp_ao_getsockopt get;
+			struct tcp_ao_info_opt info;
+		};
+		char *extend[100];
+	} tmp_opt;
+	socklen_t extended_size = sizeof(tmp_opt);
+	int sk;
+
+	memset(&tmp_opt, 0, sizeof(tmp_opt));
+	sk = prepare_defs(cmd, &tmp_opt);
+	__setsockopt_checked(sk, cmd, get, &tmp_opt, &under_size,
+			     EINVAL, tst, ": minimum size");
+
+	memset(&tmp_opt, 0, sizeof(tmp_opt));
+	sk = prepare_defs(cmd, &tmp_opt);
+	__setsockopt_checked(sk, cmd, get, &tmp_opt, &extended_size,
+			     0, tst, ": extended size");
+
+	memset(&tmp_opt, 0, sizeof(tmp_opt));
+	sk = prepare_defs(cmd, &tmp_opt);
+	__setsockopt_checked(sk, cmd, get, NULL, &extended_size,
+			     EFAULT, tst, ": null optval");
+
+	if (get) {
+		memset(&tmp_opt, 0, sizeof(tmp_opt));
+		sk = prepare_defs(cmd, &tmp_opt);
+		__setsockopt_checked(sk, cmd, get, &tmp_opt, NULL,
+				     EFAULT, tst, ": null optlen");
+	}
+}
+
+static void extend_tests(void)
+{
+	test_extend(TCP_AO_ADD_KEY, false, "AO add",
+		    offsetof(struct tcp_ao_add, key));
+	test_extend(TCP_AO_DEL_KEY, false, "AO del",
+		    offsetof(struct tcp_ao_del, keyflags));
+	test_extend(TCP_AO_INFO, false, "AO set info",
+		    offsetof(struct tcp_ao_info_opt, pkt_dropped_icmp));
+	test_extend(TCP_AO_INFO, true, "AO get info", -1);
+	test_extend(TCP_AO_GET_KEYS, true, "AO get keys", -1);
+}
+
+static void test_optmem_limit(void)
+{
+	size_t i, keys_limit, current_optmem = test_get_optmem();
+	struct tcp_ao_add ao;
+	union tcp_addr net = {};
+	int sk;
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	keys_limit = current_optmem / KERNEL_TCP_AO_KEY_SZ_ROUND_UP;
+	for (i = 0;; i++) {
+		union tcp_addr key_peer;
+		int err;
+
+		key_peer = gen_tcp_addr(net, i + 1);
+		tcp_addr_to_sockaddr_in(&ao.addr, &key_peer, 0);
+		err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY,
+				 &ao, sizeof(ao));
+		if (!err) {
+			/*
+			 * TCP_AO_ADD_KEY should be the same order as the real
+			 * sizeof(struct tcp_ao_key) in kernel.
+			 */
+			if (i <= keys_limit * 10)
+				continue;
+			test_fail("optmem limit test failed: added %zu key", i);
+			break;
+		}
+		if (i < keys_limit) {
+			test_fail("optmem limit test failed: couldn't add %zu key", i);
+			break;
+		}
+		test_ok("optmem limit was hit on adding %zu key", i);
+		break;
+	}
+	close(sk);
+}
+
+static void test_einval_add_key(void)
+{
+	struct tcp_ao_add ao;
+	int sk;
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.keylen = TCP_AO_MAXKEYLEN + 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big keylen");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.reserved = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved padding");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.reserved2 = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved2 padding");
+
+	/* tcp_ao_verify_ipv{4,6}() checks */
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.addr.ss_family = AF_UNIX;
+	memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "wrong address family");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	tcp_addr_to_sockaddr_in(&ao.addr, &this_ip_dest, 1234);
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "port (unsupported)");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 0;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "no prefix, addr");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 0;
+	memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "no prefix, any addr");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 32;
+	memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "prefix, any addr");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 129;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big prefix");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 2;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too short prefix");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.keyflags = (uint8_t)(-1);
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "bad key flags");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	make_listen(sk);
+	ao.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	make_listen(sk);
+	ao.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	make_listen(sk);
+	ao.set_current = 1;
+	ao.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current+rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as rnext");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.set_current = 1;
+	ao.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current+rnext");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.ifindex = 42;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL,
+			   "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.keyflags |= TCP_AO_KEYF_IFINDEX;
+	ao.ifindex = 42;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "non-existent VRF");
+	/*
+	 * tcp_md5_do_lookup{,_any_l3index}() are checked in unsigned-md5
+	 * see client_vrf_tests().
+	 */
+
+	test_optmem_limit();
+
+	/* tcp_ao_parse_crypto() */
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.maclen = 100;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EMSGSIZE, "maclen bigger than TCP hdr");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	strcpy(ao.alg_name, "imaginary hash algo");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, ENOENT, "bad algo");
+}
+
+static void test_einval_del_key(void)
+{
+	struct tcp_ao_del del;
+	int sk;
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.reserved = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved padding");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.reserved2 = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved2 padding");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	make_listen(sk);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	make_listen(sk);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	make_listen(sk);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_current = 1;
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current+rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.keyflags = (uint8_t)(-1);
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "bad key flags");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.ifindex = 42;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL,
+			   "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.keyflags |= TCP_AO_KEYF_IFINDEX;
+	del.ifindex = 42;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existent VRF");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing rnext key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_current = 1;
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current+rnext key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set rnext key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_current = 1;
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current+rnext key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_current = 1;
+	del.current_key = 100;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current key to be removed");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_rnext = 1;
+	del.rnext = 100;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as rnext key to be removed");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_current = 1;
+	del.current_key = 100;
+	del.set_rnext = 1;
+	del.rnext = 100;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current+rnext key to be removed");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.del_async = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "async on non-listen");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.sndid = 101;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing sndid");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.rcvid = 101;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing rcvid");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	tcp_addr_to_sockaddr_in(&del.addr, &this_ip_addr, 0);
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "incorrect addr");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "correct key delete");
+}
+
+static void test_einval_ao_info(void)
+{
+	struct tcp_ao_info_opt info;
+	int sk;
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	make_listen(sk);
+	info.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	make_listen(sk);
+	info.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	make_listen(sk);
+	info.set_current = 1;
+	info.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current+rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.reserved = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved padding");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.reserved2 = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved2 padding");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.accept_icmps = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "accept_icmps");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.ao_required = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "ao required");
+
+	if (!should_skip_test("ao required with MD5 key", KCONFIG_TCP_MD5)) {
+		sk = prepare_defs(TCP_AO_INFO, &info);
+		info.ao_required = 1;
+		if (test_set_md5(sk, tcp_md5_client, TEST_PREFIX, -1,
+				 "long long secret")) {
+			test_error("setsockopt(TCP_MD5SIG_EXT)");
+			close(sk);
+		} else {
+			setsockopt_checked(sk, TCP_AO_INFO, &info, EKEYREJECTED,
+					   "ao required with MD5 key");
+		}
+	}
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing rnext key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_current = 1;
+	info.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current+rnext key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_current = 1;
+	info.current_key = 100;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_rnext = 1;
+	info.rnext = 100;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set rnext key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_current = 1;
+	info.set_rnext = 1;
+	info.current_key = 100;
+	info.rnext = 100;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current+rnext key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_counters = 1;
+	info.pkt_good = 321;
+	info.pkt_bad = 888;
+	info.pkt_key_not_found = 654;
+	info.pkt_ao_required = 987654;
+	info.pkt_dropped_icmp = 10000;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set counters");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "no-op");
+}
+
+static void getsockopt_checked(int sk, struct tcp_ao_getsockopt *optval,
+			       int err, const char *tst)
+{
+	socklen_t len = sizeof(struct tcp_ao_getsockopt);
+
+	__setsockopt_checked(sk, TCP_AO_GET_KEYS, true, optval, &len, err,
+			     "get keys: ", tst);
+}
+
+static void test_einval_get_keys(void)
+{
+	struct tcp_ao_getsockopt out;
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+	getsockopt_checked(sk, &out, ENOENT, "no ao_info");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	getsockopt_checked(sk, &out, 0, "proper tcp_ao_get_mkts()");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.pkt_good = 643;
+	getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_good counter");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.pkt_bad = 94;
+	getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_bad counter");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.keyflags = (uint8_t)(-1);
+	getsockopt_checked(sk, &out, EINVAL, "bad keyflags");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.ifindex = 42;
+	getsockopt_checked(sk, &out, EINVAL,
+			   "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.reserved = 1;
+	getsockopt_checked(sk, &out, EINVAL, "using reserved field");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 0;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "no prefix, addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 0;
+	memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	getsockopt_checked(sk, &out, 0, "no prefix, any addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 32;
+	memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	getsockopt_checked(sk, &out, EINVAL, "prefix, any addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 129;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "too big prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 2;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "too short prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = DEFAULT_TEST_PREFIX;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, 0, "prefix + addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.prefix = DEFAULT_TEST_PREFIX;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "get_all + addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.sndid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + sndid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.rcvid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + rcvid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	out.prefix = DEFAULT_TEST_PREFIX;
+	getsockopt_checked(sk, &out, EINVAL, "current + prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "current + addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	out.sndid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "current + sndid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	out.rcvid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "current + rcvid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_rnext = 1;
+	out.prefix = DEFAULT_TEST_PREFIX;
+	getsockopt_checked(sk, &out, EINVAL, "rnext + prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_rnext = 1;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "rnext + addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_rnext = 1;
+	out.sndid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "rnext + sndid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_rnext = 1;
+	out.rcvid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "rnext + rcvid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.is_current = 1;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + current");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.is_rnext = 1;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + rnext");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	out.is_rnext = 1;
+	getsockopt_checked(sk, &out, 0, "current + rnext");
+}
+
+static void einval_tests(void)
+{
+	test_einval_add_key();
+	test_einval_del_key();
+	test_einval_ao_info();
+	test_einval_get_keys();
+}
+
+static void duplicate_tests(void)
+{
+	union tcp_addr network_dup;
+	struct tcp_ao_add ao, ao2;
+	int sk;
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao2 = ao;
+	memcpy(&ao2.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	ao2.prefix = 0;
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao2, sizeof(ao)))
+		test_error("setsockopt()");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: any addr key on the socket");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	ao.prefix = 0;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr key");
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_dup) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	if (test_prepare_def_key(&ao, "password", 0, network_dup,
+				 16, 0, 100, 100))
+		test_error("prepare default tcp_ao_add");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr for the same subnet");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy of a key");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	ao.rcvid = 101;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: RecvID differs");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	ao.sndid = 101;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs");
+}
+
+static void *client_fn(void *arg)
+{
+	if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1)
+		test_error("Can't convert ip address");
+	extend_tests();
+	einval_tests();
+	duplicate_tests();
+	/*
+	 * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters
+	 * returning proper nr & keys;
+	 */
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(120, client_fn, NULL);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
new file mode 100644
index 000000000000..6b59a652159f
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+#define fault(type)	(inj == FAULT_ ## type)
+static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver.";
+static const char *ao_password = DEFAULT_TEST_PASSWORD;
+
+static union tcp_addr client2;
+static union tcp_addr client3;
+
+static const int test_vrf_ifindex = 200;
+static const uint8_t test_vrf_tabid = 42;
+static void setup_vrfs(void)
+{
+	int err;
+
+	if (!kernel_config_has(KCONFIG_NET_VRF))
+		return;
+
+	err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1);
+	if (err)
+		test_error("Failed to add a VRF: %d", err);
+
+	err = link_set_up("ksft-vrf");
+	if (err)
+		test_error("Failed to bring up a VRF");
+
+	err = ip_route_add_vrf(veth_name, TEST_FAMILY,
+			       this_ip_addr, this_ip_dest, test_vrf_tabid);
+	if (err)
+		test_error("Failed to add a route to VRF: %d", err);
+}
+
+static void try_accept(const char *tst_name, unsigned int port,
+		       union tcp_addr *md5_addr, uint8_t md5_prefix,
+		       union tcp_addr *ao_addr, uint8_t ao_prefix,
+		       bool set_ao_required,
+		       uint8_t sndid, uint8_t rcvid, uint8_t vrf,
+		       const char *cnt_name, test_cnt cnt_expected,
+		       int needs_tcp_md5, fault_t inj)
+{
+	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+	int lsk, err, sk = 0;
+	time_t timeout;
+
+	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+		return;
+
+	lsk = test_listen_socket(this_ip_addr, port, 1);
+
+	if (md5_addr && test_set_md5(lsk, *md5_addr, md5_prefix, -1, md5_password))
+		test_error("setsockopt(TCP_MD5SIG_EXT)");
+
+	if (ao_addr && test_add_key(lsk, ao_password,
+				    *ao_addr, ao_prefix, sndid, rcvid))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	if (set_ao_required && test_set_ao_flags(lsk, true, false))
+		test_error("setsockopt(TCP_AO_INFO)");
+
+	if (cnt_name)
+		before_cnt = netstat_get_one(cnt_name, NULL);
+	if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	err = test_wait_fd(lsk, timeout, 0);
+	if (err == -ETIMEDOUT) {
+		if (!fault(TIMEOUT))
+			test_fail("timed out for accept()");
+	} else if (err < 0) {
+		test_error("test_wait_fd()");
+	} else {
+		if (fault(TIMEOUT))
+			test_fail("ready to accept");
+
+		sk = accept(lsk, NULL, NULL);
+		if (sk < 0) {
+			test_error("accept()");
+		} else {
+			if (fault(TIMEOUT))
+				test_fail("%s: accepted", tst_name);
+		}
+	}
+
+	if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2))
+		test_error("test_get_tcp_ao_counters()");
+	close(lsk);
+
+	if (!cnt_name) {
+		test_ok("%s: no counter checks", tst_name);
+		goto out;
+	}
+
+	after_cnt = netstat_get_one(cnt_name, NULL);
+
+	if (after_cnt <= before_cnt) {
+		test_fail("%s: %s counter did not increase: %zu <= %zu",
+				tst_name, cnt_name, after_cnt, before_cnt);
+	} else {
+		test_ok("%s: counter %s increased %zu => %zu",
+			tst_name, cnt_name, before_cnt, after_cnt);
+	}
+	if (ao_addr)
+		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+
+out:
+	synchronize_threads(); /* test_kill_sk() */
+	if (sk > 0)
+		test_kill_sk(sk);
+}
+
+static void server_add_routes(void)
+{
+	int family = TEST_FAMILY;
+
+	synchronize_threads(); /* client_add_ips() */
+
+	if (ip_route_add(veth_name, family, this_ip_addr, client2))
+		test_error("Failed to add route");
+	if (ip_route_add(veth_name, family, this_ip_addr, client3))
+		test_error("Failed to add route");
+}
+
+static void server_add_fail_tests(unsigned int *port)
+{
+	union tcp_addr addr_any = {};
+
+	try_accept("TCP-AO established: add TCP-MD5 key", (*port)++, NULL, 0,
+		   &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD,
+		   1, 0);
+	try_accept("TCP-MD5 established: add TCP-AO key", (*port)++, &addr_any,
+		   0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
+	try_accept("non-signed established: add TCP-AO key", (*port)++, NULL, 0,
+		   NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
+}
+
+static void server_vrf_tests(unsigned int *port)
+{
+	setup_vrfs();
+}
+
+static void *server_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+	union tcp_addr addr_any = {};
+
+	server_add_routes();
+
+	try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+		   &addr_any, 0, 0, 100, 100, 0, "TCPAOGood",
+		   TEST_CNT_GOOD, 0, 0);
+	try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
+		   &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected",
+		   0, 1, FAULT_TIMEOUT);
+	try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0,
+		   &addr_any, 0, 0, 100, 100, 0, "TCPAORequired",
+		   TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
+	try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+		   &this_ip_dest, TEST_PREFIX, true,
+		   100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0);
+	try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+		   &this_ip_dest, TEST_PREFIX, true,
+		   100, 100, 0, "TCPAORequired",
+		   TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
+
+	try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
+		   NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
+		   0, 1, FAULT_TIMEOUT);
+	try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+		   NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
+	try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
+		   0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound",
+		   0, 1, FAULT_TIMEOUT);
+
+	try_accept("no sign server: AO client", port++, NULL, 0,
+		   NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
+		   TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
+	try_accept("no sign server: MD5 client", port++, NULL, 0,
+		   NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected",
+		   0, 1, FAULT_TIMEOUT);
+	try_accept("no sign server: no sign client", port++, NULL, 0,
+		   NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
+
+	try_accept("AO+MD5 server: AO client (matching)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0);
+	try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
+		   1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
+		   1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: MD5 client (matching)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, NULL, 0, 1, 0);
+	try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: no sign client (unmatched)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "CurrEstab", 0, 1, 0);
+	try_accept("AO+MD5 server: no sign client (misconfig, matching AO)",
+		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPAORequired",
+		   TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)",
+		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT);
+
+	try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+
+	server_add_fail_tests(&port);
+
+	server_vrf_tests(&port);
+
+	/* client exits */
+	synchronize_threads();
+	return NULL;
+}
+
+static int client_bind(int sk, union tcp_addr bind_addr)
+{
+#ifdef IPV6_TEST
+	struct sockaddr_in6 addr = {
+		.sin6_family	= AF_INET6,
+		.sin6_port	= 0,
+		.sin6_addr	= bind_addr.a6,
+	};
+#else
+	struct sockaddr_in addr = {
+		.sin_family	= AF_INET,
+		.sin_port	= 0,
+		.sin_addr	= bind_addr.a4,
+	};
+#endif
+	return bind(sk, &addr, sizeof(addr));
+}
+
+static void try_connect(const char *tst_name, unsigned int port,
+		       union tcp_addr *md5_addr, uint8_t md5_prefix,
+		       union tcp_addr *ao_addr, uint8_t ao_prefix,
+		       uint8_t sndid, uint8_t rcvid, uint8_t vrf,
+		       fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr)
+{
+	time_t timeout;
+	int sk, ret;
+
+	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+		return;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (bind_addr && client_bind(sk, *bind_addr))
+		test_error("bind()");
+
+	if (md5_addr && test_set_md5(sk, *md5_addr, md5_prefix, -1, md5_password))
+		test_error("setsockopt(TCP_MD5SIG_EXT)");
+
+	if (ao_addr && test_add_key(sk, ao_password, *ao_addr,
+				    ao_prefix, sndid, rcvid))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+	if (ret < 0) {
+		if (fault(KEYREJECT) && ret == -EKEYREJECTED)
+			test_ok("%s: connect() was prevented", tst_name);
+		else if (ret == -ETIMEDOUT && fault(TIMEOUT))
+			test_ok("%s", tst_name);
+		else if (ret == -ECONNREFUSED &&
+				(fault(TIMEOUT) || fault(KEYREJECT)))
+			test_ok("%s: refused to connect", tst_name);
+		else
+			test_error("%s: connect() returned %d", tst_name, ret);
+		goto out;
+	}
+
+	if (fault(TIMEOUT) || fault(KEYREJECT))
+		test_fail("%s: connected", tst_name);
+	else
+		test_ok("%s: connected", tst_name);
+
+out:
+	synchronize_threads(); /* test_kill_sk() */
+	/* _test_connect_socket() cleans up on failure */
+	if (ret > 0)
+		test_kill_sk(sk);
+}
+
+#define PREINSTALL_MD5_FIRST	BIT(0)
+#define PREINSTALL_AO		BIT(1)
+#define POSTINSTALL_AO		BIT(2)
+#define PREINSTALL_MD5		BIT(3)
+#define POSTINSTALL_MD5		BIT(4)
+
+static int try_add_key_vrf(int sk, union tcp_addr in_addr, uint8_t prefix,
+			   int vrf, uint8_t sndid, uint8_t rcvid,
+			   bool set_ao_required)
+{
+	uint8_t keyflags = 0;
+
+	if (vrf >= 0)
+		keyflags |= TCP_AO_KEYF_IFINDEX;
+	else
+		vrf = 0;
+	if (set_ao_required) {
+		int err = test_set_ao_flags(sk, true, 0);
+
+		if (err)
+			return err;
+	}
+	return test_add_key_vrf(sk, ao_password, keyflags, in_addr, prefix,
+				(uint8_t)vrf, sndid, rcvid);
+}
+
+static bool test_continue(const char *tst_name, int err,
+			  fault_t inj, bool added_ao)
+{
+	bool expected_to_fail;
+
+	expected_to_fail = fault(PREINSTALL_AO) && added_ao;
+	expected_to_fail |= fault(PREINSTALL_MD5) && !added_ao;
+
+	if (!err) {
+		if (!expected_to_fail)
+			return true;
+		test_fail("%s: setsockopt()s were expected to fail", tst_name);
+		return false;
+	}
+	if (err != -EKEYREJECTED || !expected_to_fail) {
+		test_error("%s: setsockopt(%s) = %d", tst_name,
+			   added_ao ? "TCP_AO_ADD_KEY" : "TCP_MD5SIG_EXT", err);
+		return false;
+	}
+	test_ok("%s: prefailed as expected: %m", tst_name);
+	return false;
+}
+
+static int open_add(const char *tst_name, unsigned int port,
+		    unsigned int strategy,
+		    union tcp_addr md5_addr, uint8_t md5_prefix, int md5_vrf,
+		    union tcp_addr ao_addr, uint8_t ao_prefix,
+		    int ao_vrf, bool set_ao_required,
+		    uint8_t sndid, uint8_t rcvid,
+		    fault_t inj)
+{
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (client_bind(sk, this_ip_addr))
+		test_error("bind()");
+
+	if (strategy & PREINSTALL_MD5_FIRST) {
+		if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password))
+			test_error("setsockopt(TCP_MD5SIG_EXT)");
+	}
+
+	if (strategy & PREINSTALL_AO) {
+		int err = try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf,
+					  sndid, rcvid, set_ao_required);
+
+		if (!test_continue(tst_name, err, inj, true)) {
+			close(sk);
+			return -1;
+		}
+	}
+
+	if (strategy & PREINSTALL_MD5) {
+		errno = 0;
+		test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password);
+		if (!test_continue(tst_name, -errno, inj, false)) {
+			close(sk);
+			return -1;
+		}
+	}
+
+	return sk;
+}
+
+static void try_to_preadd(const char *tst_name, unsigned int port,
+			  unsigned int strategy,
+			  union tcp_addr md5_addr, uint8_t md5_prefix,
+			  int md5_vrf,
+			  union tcp_addr ao_addr, uint8_t ao_prefix,
+			  int ao_vrf, bool set_ao_required,
+			  uint8_t sndid, uint8_t rcvid,
+			  int needs_tcp_md5, int needs_vrf, fault_t inj)
+{
+	int sk;
+
+	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+		return;
+	if (needs_vrf && should_skip_test(tst_name, KCONFIG_NET_VRF))
+		return;
+
+	sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf,
+		      ao_addr, ao_prefix, ao_vrf, set_ao_required,
+		      sndid, rcvid, inj);
+	if (sk < 0)
+		return;
+
+	test_ok("%s", tst_name);
+	close(sk);
+}
+
+static void try_to_add(const char *tst_name, unsigned int port,
+		       unsigned int strategy,
+		       union tcp_addr md5_addr, uint8_t md5_prefix,
+		       int md5_vrf,
+		       union tcp_addr ao_addr, uint8_t ao_prefix,
+		       int ao_vrf, uint8_t sndid, uint8_t rcvid,
+		       int needs_tcp_md5, fault_t inj)
+{
+	time_t timeout;
+	int sk, ret;
+
+	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+		return;
+
+	sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf,
+		      ao_addr, ao_prefix, ao_vrf, 0, sndid, rcvid, inj);
+	if (sk < 0)
+		return;
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+	if (ret <= 0) {
+		test_error("%s: connect() returned %d", tst_name, ret);
+		goto out;
+	}
+
+	if (strategy & POSTINSTALL_MD5) {
+		if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) {
+			if (fault(POSTINSTALL)) {
+				test_ok("%s: postfailed as expected", tst_name);
+				goto out;
+			} else {
+				test_error("setsockopt(TCP_MD5SIG_EXT)");
+			}
+		} else if (fault(POSTINSTALL)) {
+			test_fail("%s: post setsockopt() was expected to fail", tst_name);
+			goto out;
+		}
+	}
+
+	if (strategy & POSTINSTALL_AO) {
+		if (try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf,
+				   sndid, rcvid, 0)) {
+			if (fault(POSTINSTALL)) {
+				test_ok("%s: postfailed as expected", tst_name);
+				goto out;
+			} else {
+				test_error("setsockopt(TCP_AO_ADD_KEY)");
+			}
+		} else if (fault(POSTINSTALL)) {
+			test_fail("%s: post setsockopt() was expected to fail", tst_name);
+			goto out;
+		}
+	}
+
+out:
+	synchronize_threads(); /* test_kill_sk() */
+	/* _test_connect_socket() cleans up on failure */
+	if (ret > 0)
+		test_kill_sk(sk);
+}
+
+static void client_add_ip(union tcp_addr *client, const char *ip)
+{
+	int err, family = TEST_FAMILY;
+
+	if (inet_pton(family, ip, client) != 1)
+		test_error("Can't convert ip address %s", ip);
+
+	err = ip_addr_add(veth_name, family, *client, TEST_PREFIX);
+	if (err)
+		test_error("Failed to add ip address: %d", err);
+}
+
+static void client_add_ips(void)
+{
+	client_add_ip(&client2, __TEST_CLIENT_IP(2));
+	client_add_ip(&client3, __TEST_CLIENT_IP(3));
+	synchronize_threads(); /* server_add_routes() */
+}
+
+static void client_add_fail_tests(unsigned int *port)
+{
+	try_to_add("TCP-AO established: add TCP-MD5 key",
+		   (*port)++, POSTINSTALL_MD5 | PREINSTALL_AO,
+		   this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+		   100, 100, 1, FAULT_POSTINSTALL);
+	try_to_add("TCP-MD5 established: add TCP-AO key",
+		   (*port)++, PREINSTALL_MD5 | POSTINSTALL_AO,
+		   this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+		   100, 100, 1, FAULT_POSTINSTALL);
+	try_to_add("non-signed established: add TCP-AO key",
+		   (*port)++, POSTINSTALL_AO,
+		   this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+		   100, 100, 0, FAULT_POSTINSTALL);
+
+	try_to_add("TCP-AO key intersects with existing TCP-MD5 key",
+		   (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		   this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1,
+		   100, 100, 1, FAULT_PREINSTALL_AO);
+	try_to_add("TCP-MD5 key intersects with existing TCP-AO key",
+		   (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		   this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1,
+		   100, 100, 1, FAULT_PREINSTALL_MD5);
+
+	try_to_preadd("TCP-MD5 key + TCP-AO required",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, -1, true,
+		      100, 100, 1, 0, FAULT_PREINSTALL_AO);
+	try_to_preadd("TCP-AO required on socket + TCP-MD5 key",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, -1, true,
+		      100, 100, 1, 0, FAULT_PREINSTALL_MD5);
+}
+
+static void client_vrf_tests(unsigned int *port)
+{
+	setup_vrfs();
+
+	/* The following restrictions for setsockopt()s are expected:
+	 *
+	 * |--------------|-----------------|-------------|-------------|
+	 * |              | MD5 key without |   MD5 key   |   MD5 key   |
+	 * |              |     l3index     |  l3index=0  |  l3index=N  |
+	 * |--------------|-----------------|-------------|-------------|
+	 * |  TCP-AO key  |                 |             |             |
+	 * |  without     |     reject      |    reject   |    reject   |
+	 * |  l3index     |                 |             |             |
+	 * |--------------|-----------------|-------------|-------------|
+	 * |  TCP-AO key  |                 |             |             |
+	 * |  l3index=0   |     reject      |    reject   |    allow    |
+	 * |--------------|-----------------|-------------|-------------|
+	 * |  TCP-AO key  |                 |             |             |
+	 * |  l3index=N   |     reject      |    allow    |    reject   |
+	 * |--------------|-----------------|-------------|-------------|
+	 */
+	try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+
+	try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, 0);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, 0);
+
+	try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, 0);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, 0);
+	try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+}
+
+static void *client_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+	union tcp_addr addr_any = {};
+
+	client_add_ips();
+
+	try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+		    &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+	try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+	try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0,
+		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+	try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+		    &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+	try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2);
+
+	try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0,
+		   &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+	try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+		   NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+	try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0,
+		   NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+
+	try_connect("no sign server: AO client", port++, NULL, 0,
+		   &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+	try_connect("no sign server: MD5 client", port++, &addr_any, 0,
+		   NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+	try_connect("no sign server: no sign client", port++, NULL, 0,
+		   NULL, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+
+	try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0,
+		   &addr_any, 0, 100, 100, 0, 0, 1, &client2);
+	try_connect("AO+MD5 server: AO client (misconfig, matching MD5)",
+		   port++, NULL, 0, &addr_any, 0, 100, 100, 0,
+		   FAULT_TIMEOUT, 1, &this_ip_addr);
+	try_connect("AO+MD5 server: AO client (misconfig, non-matching)",
+		   port++, NULL, 0, &addr_any, 0, 100, 100, 0,
+		   FAULT_TIMEOUT, 1, &client3);
+	try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0,
+		   NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+	try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)",
+		   port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+		   1, &client2);
+	try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)",
+		   port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+		   1, &client3);
+	try_connect("AO+MD5 server: no sign client (unmatched)",
+		   port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3);
+	try_connect("AO+MD5 server: no sign client (misconfig, matching AO)",
+		   port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+		   1, &client2);
+	try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)",
+		   port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+		   1, &this_ip_addr);
+
+	try_connect("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+		   port++, &this_ip_addr, TEST_PREFIX,
+		   &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT,
+		   1, &this_ip_addr);
+	try_connect("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+		   port++, &this_ip_addr, TEST_PREFIX,
+		   &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT,
+		   1, &client2);
+
+	client_add_fail_tests(&port);
+	client_vrf_tests(&port);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(72, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
index 9c55ec44fc43..c1cb0c75156a 100644
--- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -26,6 +26,8 @@
 #include <fcntl.h>
 #include <time.h>
 
+#include "../kselftest.h"
+
 #ifndef TCP_FASTOPEN_KEY
 #define TCP_FASTOPEN_KEY 33
 #endif
@@ -34,10 +36,6 @@
 #define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
 #define KEY_LENGTH 16
 
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-#endif
-
 static bool do_ipv6;
 static bool do_sockopt;
 static bool do_rotate;
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 00f837c9bc6c..4fcce5150850 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -66,11 +66,16 @@
 #include <poll.h>
 #include <linux/tcp.h>
 #include <assert.h>
+#include <openssl/pem.h>
 
 #ifndef MSG_ZEROCOPY
 #define MSG_ZEROCOPY    0x4000000
 #endif
 
+#ifndef min
+#define min(a, b)  ((a) < (b) ? (a) : (b))
+#endif
+
 #define FILE_SZ (1ULL << 35)
 static int cfg_family = AF_INET6;
 static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
@@ -81,12 +86,14 @@ static int sndbuf; /* Default: autotuning.  Can be set with -w <integer> option
 static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
 static int xflg; /* hash received data (simple xor) (-h option) */
 static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+static int integrity; /* -i option: sender and receiver compute sha256 over the data.*/
 
 static size_t chunk_size  = 512*1024;
 
 static size_t map_align;
 
 unsigned long htotal;
+unsigned int digest_len;
 
 static inline void prefetch(const void *x)
 {
@@ -137,7 +144,8 @@ static void *mmap_large_buffer(size_t need, size_t *allocated)
 	if (buffer == (void *)-1) {
 		sz = need;
 		buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
-			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+			      MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE,
+			      -1, 0);
 		if (buffer != (void *)-1)
 			fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
 	}
@@ -145,14 +153,29 @@ static void *mmap_large_buffer(size_t need, size_t *allocated)
 	return buffer;
 }
 
+static uint32_t tcp_info_get_rcv_mss(int fd)
+{
+	socklen_t sz = sizeof(struct tcp_info);
+	struct tcp_info info;
+
+	if (getsockopt(fd, IPPROTO_TCP, TCP_INFO, &info, &sz)) {
+		fprintf(stderr, "Error fetching TCP_INFO\n");
+		return 0;
+	}
+
+	return info.tcpi_rcv_mss;
+}
+
 void *child_thread(void *arg)
 {
+	unsigned char digest[SHA256_DIGEST_LENGTH];
 	unsigned long total_mmap = 0, total = 0;
 	struct tcp_zerocopy_receive zc;
+	unsigned char *buffer = NULL;
 	unsigned long delta_usec;
+	EVP_MD_CTX *ctx = NULL;
 	int flags = MAP_SHARED;
 	struct timeval t0, t1;
-	char *buffer = NULL;
 	void *raddr = NULL;
 	void *addr = NULL;
 	double throughput;
@@ -179,6 +202,14 @@ void *child_thread(void *arg)
 			addr = ALIGN_PTR_UP(raddr, map_align);
 		}
 	}
+	if (integrity) {
+		ctx = EVP_MD_CTX_new();
+		if (!ctx) {
+			perror("cannot enable SHA computing");
+			goto error;
+		}
+		EVP_DigestInit_ex(ctx, EVP_sha256(), NULL);
+	}
 	while (1) {
 		struct pollfd pfd = { .fd = fd, .events = POLLIN, };
 		int sub;
@@ -190,7 +221,7 @@ void *child_thread(void *arg)
 
 			memset(&zc, 0, sizeof(zc));
 			zc.address = (__u64)((unsigned long)addr);
-			zc.length = chunk_size;
+			zc.length = min(chunk_size, FILE_SZ - total);
 
 			res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
 					 &zc, &zc_len);
@@ -199,6 +230,8 @@ void *child_thread(void *arg)
 
 			if (zc.length) {
 				assert(zc.length <= chunk_size);
+				if (integrity)
+					EVP_DigestUpdate(ctx, addr, zc.length);
 				total_mmap += zc.length;
 				if (xflg)
 					hash_zone(addr, zc.length);
@@ -210,22 +243,30 @@ void *child_thread(void *arg)
 			}
 			if (zc.recv_skip_hint) {
 				assert(zc.recv_skip_hint <= chunk_size);
-				lu = read(fd, buffer, zc.recv_skip_hint);
+				lu = read(fd, buffer, min(zc.recv_skip_hint,
+							  FILE_SZ - total));
 				if (lu > 0) {
+					if (integrity)
+						EVP_DigestUpdate(ctx, buffer, lu);
 					if (xflg)
 						hash_zone(buffer, lu);
 					total += lu;
 				}
+				if (lu == 0)
+					goto end;
 			}
 			continue;
 		}
 		sub = 0;
 		while (sub < chunk_size) {
-			lu = read(fd, buffer + sub, chunk_size - sub);
+			lu = read(fd, buffer + sub, min(chunk_size - sub,
+							FILE_SZ - total));
 			if (lu == 0)
 				goto end;
 			if (lu < 0)
 				break;
+			if (integrity)
+				EVP_DigestUpdate(ctx, buffer + sub, lu);
 			if (xflg)
 				hash_zone(buffer + sub, lu);
 			total += lu;
@@ -236,6 +277,20 @@ end:
 	gettimeofday(&t1, NULL);
 	delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
 
+	if (integrity) {
+		fcntl(fd, F_SETFL, 0);
+		EVP_DigestFinal_ex(ctx, digest, &digest_len);
+		lu = read(fd, buffer, SHA256_DIGEST_LENGTH);
+		if (lu != SHA256_DIGEST_LENGTH)
+			perror("Error: Cannot read SHA256\n");
+
+		if (memcmp(digest, buffer,
+			   SHA256_DIGEST_LENGTH))
+			fprintf(stderr, "Error: SHA256 of the data is not right\n");
+		else
+			printf("\nSHA256 is correct\n");
+	}
+
 	throughput = 0;
 	if (delta_usec)
 		throughput = total * 8.0 / (double)delta_usec / 1000.0;
@@ -246,7 +301,7 @@ end:
 		total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
 			     1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
 		printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
-		       "  cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+		       "  cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches, rcv_mss %u\n",
 				total / (1024.0 * 1024.0),
 				100.0*total_mmap/total,
 				(double)delta_usec / 1000000.0,
@@ -254,7 +309,8 @@ end:
 				(double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
 				(double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
 				(double)total_usec/mb,
-				ru.ru_nvcsw);
+				ru.ru_nvcsw,
+				tcp_info_get_rcv_mss(fd));
 	}
 error:
 	munmap(buffer, buffer_sz);
@@ -367,19 +423,38 @@ static unsigned long default_huge_page_size(void)
 	return hps;
 }
 
+static void randomize(void *target, size_t count)
+{
+	static int urandom = -1;
+	ssize_t got;
+
+	urandom = open("/dev/urandom", O_RDONLY);
+	if (urandom < 0) {
+		perror("open /dev/urandom");
+		exit(1);
+	}
+	got = read(urandom, target, count);
+	if (got != count) {
+		perror("read /dev/urandom");
+		exit(1);
+	}
+}
+
 int main(int argc, char *argv[])
 {
+	unsigned char digest[SHA256_DIGEST_LENGTH];
 	struct sockaddr_storage listenaddr, addr;
 	unsigned int max_pacing_rate = 0;
+	EVP_MD_CTX *ctx = NULL;
+	unsigned char *buffer;
 	uint64_t total = 0;
 	char *host = NULL;
 	int fd, c, on = 1;
 	size_t buffer_sz;
-	char *buffer;
 	int sflg = 0;
 	int mss = 0;
 
-	while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) {
+	while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:i")) != -1) {
 		switch (c) {
 		case '4':
 			cfg_family = PF_INET;
@@ -425,6 +500,9 @@ int main(int argc, char *argv[])
 		case 'a':
 			map_align = atol(optarg);
 			break;
+		case 'i':
+			integrity = 1;
+			break;
 		default:
 			exit(1);
 		}
@@ -467,7 +545,7 @@ int main(int argc, char *argv[])
 	}
 
 	buffer = mmap_large_buffer(chunk_size, &buffer_sz);
-	if (buffer == (char *)-1) {
+	if (buffer == (unsigned char *)-1) {
 		perror("mmap");
 		exit(1);
 	}
@@ -500,17 +578,34 @@ int main(int argc, char *argv[])
 		perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
 		zflg = 0;
 	}
+	if (integrity) {
+		randomize(buffer, buffer_sz);
+		ctx = EVP_MD_CTX_new();
+		if (!ctx) {
+			perror("cannot enable SHA computing");
+			exit(1);
+		}
+		EVP_DigestInit_ex(ctx, EVP_sha256(), NULL);
+	}
 	while (total < FILE_SZ) {
+		size_t offset = total % chunk_size;
 		int64_t wr = FILE_SZ - total;
 
-		if (wr > chunk_size)
-			wr = chunk_size;
-		/* Note : we just want to fill the pipe with 0 bytes */
-		wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0);
+		if (wr > chunk_size - offset)
+			wr = chunk_size - offset;
+		/* Note : we just want to fill the pipe with random bytes */
+		wr = send(fd, buffer + offset,
+			  (size_t)wr, zflg ? MSG_ZEROCOPY : 0);
 		if (wr <= 0)
 			break;
+		if (integrity)
+			EVP_DigestUpdate(ctx, buffer + offset, wr);
 		total += wr;
 	}
+	if (integrity && total == FILE_SZ) {
+		EVP_DigestFinal_ex(ctx, digest, &digest_len);
+		send(fd, digest, (size_t)SHA256_DIGEST_LENGTH, 0);
+	}
 	close(fd);
 	munmap(buffer, buffer_sz);
 	return 0;
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
new file mode 100755
index 000000000000..1b3f89e2b86e
--- /dev/null
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -0,0 +1,775 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking bridge backup port and backup nexthop ID
+# functionality. The topology consists of two bridge (VTEPs) connected using
+# VXLAN. The test checks that when the switch port (swp1) is down, traffic is
+# redirected to the VXLAN port (vx0). When a backup nexthop ID is configured,
+# the test checks that traffic is redirected with the correct nexthop
+# information.
+#
+# +------------------------------------+ +------------------------------------+
+# |    + swp1                   + vx0  | |    + swp1                   + vx0  |
+# |    |                        |      | |    |                        |      |
+# |    |           br0          |      | |    |                        |      |
+# |    +------------+-----------+      | |    +------------+-----------+      |
+# |                 |                  | |                 |                  |
+# |                 |                  | |                 |                  |
+# |                 +                  | |                 +                  |
+# |                br0                 | |                br0                 |
+# |                 +                  | |                 +                  |
+# |                 |                  | |                 |                  |
+# |                 |                  | |                 |                  |
+# |                 +                  | |                 +                  |
+# |              br0.10                | |              br0.10                |
+# |           192.0.2.65/28            | |            192.0.2.66/28           |
+# |                                    | |                                    |
+# |                                    | |                                    |
+# |                 192.0.2.33         | |                 192.0.2.34         |
+# |                 + lo               | |                 + lo               |
+# |                                    | |                                    |
+# |                                    | |                                    |
+# |                   192.0.2.49/28    | |    192.0.2.50/28                   |
+# |                           veth0 +-------+ veth0                           |
+# |                                    | |                                    |
+# | sw1                                | | sw2                                |
+# +------------------------------------+ +------------------------------------+
+
+source lib.sh
+ret=0
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+	backup_port
+	backup_nhid
+	backup_nhid_invalid
+	backup_nhid_ping
+	backup_nhid_torture
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+PING_TIMEOUT=5
+
+################################################################################
+# Utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "$VERBOSE" = "1" ]; then
+			echo "    rc=$rc, expected $expected"
+		fi
+
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+tc_check_packets()
+{
+	local ns=$1; shift
+	local id=$1; shift
+	local handle=$1; shift
+	local count=$1; shift
+	local pkts
+
+	sleep 0.1
+	pkts=$(tc -n $ns -j -s filter show $id \
+		| jq ".[] | select(.options.handle == $handle) | \
+		.options.actions[0].stats.packets")
+	[[ $pkts == $count ]]
+}
+
+bridge_link_check()
+{
+	local ns=$1; shift
+	local dev=$1; shift
+	local state=$1; shift
+
+	bridge -n $ns -d -j link show dev $dev | \
+		jq -e ".[][\"state\"] == \"$state\"" &> /dev/null
+}
+
+################################################################################
+# Setup
+
+setup_topo_ns()
+{
+	local ns=$1; shift
+
+	ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+	ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+setup_topo()
+{
+	local ns
+
+	setup_ns sw1 sw2
+	for ns in $sw1 $sw2; do
+		setup_topo_ns $ns
+	done
+
+	ip link add name veth0 type veth peer name veth1
+	ip link set dev veth0 netns $sw1 name veth0
+	ip link set dev veth1 netns $sw2 name veth0
+}
+
+setup_sw_common()
+{
+	local ns=$1; shift
+	local local_addr=$1; shift
+	local remote_addr=$1; shift
+	local veth_addr=$1; shift
+	local gw_addr=$1; shift
+	local br_addr=$1; shift
+
+	ip -n $ns address add $local_addr/32 dev lo
+
+	ip -n $ns link set dev veth0 up
+	ip -n $ns address add $veth_addr/28 dev veth0
+	ip -n $ns route add default via $gw_addr
+
+	ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+		vlan_default_pvid 0 mcast_snooping 0
+
+	ip -n $ns link add link br0 name br0.10 up type vlan id 10
+	bridge -n $ns vlan add vid 10 dev br0 self
+	ip -n $ns address add $br_addr/28 dev br0.10
+
+	ip -n $ns link add name swp1 up type dummy
+	ip -n $ns link set dev swp1 master br0
+	bridge -n $ns vlan add vid 10 dev swp1 untagged
+
+	ip -n $ns link add name vx0 up master br0 type vxlan \
+		local $local_addr dstport 4789 nolearning external
+	bridge -n $ns link set dev vx0 vlan_tunnel on learning off
+
+	bridge -n $ns vlan add vid 10 dev vx0
+	bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+}
+
+setup_sw1()
+{
+	local ns=$sw1
+	local local_addr=192.0.2.33
+	local remote_addr=192.0.2.34
+	local veth_addr=192.0.2.49
+	local gw_addr=192.0.2.50
+	local br_addr=192.0.2.65
+
+	setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \
+		$br_addr
+}
+
+setup_sw2()
+{
+	local ns=$sw2
+	local local_addr=192.0.2.34
+	local remote_addr=192.0.2.33
+	local veth_addr=192.0.2.50
+	local gw_addr=192.0.2.49
+	local br_addr=192.0.2.66
+
+	setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \
+		$br_addr
+}
+
+setup()
+{
+	set -e
+
+	setup_topo
+	setup_sw1
+	setup_sw2
+
+	sleep 5
+
+	set +e
+}
+
+cleanup()
+{
+	cleanup_ns $sw1 $sw2
+}
+
+################################################################################
+# Tests
+
+backup_port()
+{
+	local dmac=00:11:22:33:44:55
+	local smac=00:aa:bb:cc:dd:ee
+
+	echo
+	echo "Backup port"
+	echo "-----------"
+
+	run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+	run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+	# Initial state - check that packets are forwarded out of swp1 when it
+	# has a carrier and not forwarded out of any port when it does not have
+	# a carrier.
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 1
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 0
+	log_test $? 0 "No forwarding out of vx0"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	log_test $? 0 "swp1 carrier off"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 1
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 0
+	log_test $? 0 "No forwarding out of vx0"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+	log_test $? 0 "swp1 carrier on"
+
+	# Configure vx0 as the backup port of swp1 and check that packets are
+	# forwarded out of swp1 when it has a carrier and out of vx0 when swp1
+	# does not have a carrier.
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+	log_test $? 0 "vx0 configured as backup port of swp1"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 2
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 0
+	log_test $? 0 "No forwarding out of vx0"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	log_test $? 0 "swp1 carrier off"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 2
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "Forwarding out of vx0"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+	log_test $? 0 "swp1 carrier on"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 3
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "No forwarding out of vx0"
+
+	# Remove vx0 as the backup port of swp1 and check that packets are no
+	# longer forwarded out of vx0 when swp1 does not have a carrier.
+	run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+	log_test $? 1 "vx0 not configured as backup port of swp1"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "No forwarding out of vx0"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	log_test $? 0 "swp1 carrier off"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "No forwarding out of vx0"
+}
+
+backup_nhid()
+{
+	local dmac=00:11:22:33:44:55
+	local smac=00:aa:bb:cc:dd:ee
+
+	echo
+	echo "Backup nexthop ID"
+	echo "-----------------"
+
+	run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+	run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+	run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
+
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010"
+
+	run_cmd "ip -n $sw2 address replace 192.0.2.36/32 dev lo"
+
+	# The first filter matches on packets forwarded using the backup
+	# nexthop ID and the second filter matches on packets forwarded using a
+	# regular VXLAN FDB entry.
+	run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+	run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass"
+
+	# Configure vx0 as the backup port of swp1 and check that packets are
+	# forwarded out of swp1 when it has a carrier and out of vx0 when swp1
+	# does not have a carrier. When packets are forwarded out of vx0, check
+	# that they are forwarded by the VXLAN FDB entry.
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+	log_test $? 0 "vx0 configured as backup port of swp1"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 1
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 0
+	log_test $? 0 "No forwarding out of vx0"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	log_test $? 0 "swp1 carrier off"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 1
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "Forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 0
+	log_test $? 0 "No forwarding using backup nexthop ID"
+	tc_check_packets $sw2 "dev vx0 ingress" 102 1
+	log_test $? 0 "Forwarding using VXLAN FDB entry"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+	log_test $? 0 "swp1 carrier on"
+
+	# Configure nexthop ID 10 as the backup nexthop ID of swp1 and check
+	# that when packets are forwarded out of vx0, they are forwarded using
+	# the backup nexthop ID.
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+	log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 2
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "No forwarding out of vx0"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	log_test $? 0 "swp1 carrier off"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 2
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "Forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "Forwarding using backup nexthop ID"
+	tc_check_packets $sw2 "dev vx0 ingress" 102 1
+	log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+	log_test $? 0 "swp1 carrier on"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 3
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "No forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "No forwarding using backup nexthop ID"
+	tc_check_packets $sw2 "dev vx0 ingress" 102 1
+	log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+	# Reset the backup nexthop ID to 0 and check that packets are no longer
+	# forwarded using the backup nexthop ID when swp1 does not have a
+	# carrier and are instead forwarded by the VXLAN FDB.
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid\""
+	log_test $? 1 "No backup nexthop ID configured for swp1"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "No forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "No forwarding using backup nexthop ID"
+	tc_check_packets $sw2 "dev vx0 ingress" 102 1
+	log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	log_test $? 0 "swp1 carrier off"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
+	log_test $? 0 "Forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "No forwarding using backup nexthop ID"
+	tc_check_packets $sw2 "dev vx0 ingress" 102 2
+	log_test $? 0 "Forwarding using VXLAN FDB entry"
+}
+
+backup_nhid_invalid()
+{
+	local dmac=00:11:22:33:44:55
+	local smac=00:aa:bb:cc:dd:ee
+	local tx_drop
+
+	echo
+	echo "Backup nexthop ID - invalid IDs"
+	echo "-------------------------------"
+
+	# Check that when traffic is redirected with an invalid nexthop ID, it
+	# is forwarded out of the VXLAN port, but dropped by the VXLAN driver
+	# and does not crash the host.
+
+	run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+	run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+	# Drop all other Tx traffic to avoid changes to Tx drop counter.
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop"
+
+	tx_drop=$(ip -n $sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]')
+
+	run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
+
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+	run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+
+	# First, check that redirection works.
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+	log_test $? 0 "vx0 configured as backup port of swp1"
+
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+	log_test $? 0 "Valid nexthop as backup nexthop"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	log_test $? 0 "swp1 carrier off"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "Forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "Forwarding using backup nexthop ID"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'"
+	log_test $? 0 "No Tx drop increase"
+
+	# Use a non-existent nexthop ID.
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 20"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 20\""
+	log_test $? 0 "Non-existent nexthop as backup nexthop"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "Forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "No forwarding using backup nexthop ID"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'"
+	log_test $? 0 "Tx drop increased"
+
+	# Use a blckhole nexthop.
+	run_cmd "ip -n $sw1 nexthop replace id 30 blackhole"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 30"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 30\""
+	log_test $? 0 "Blackhole nexthop as backup nexthop"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
+	log_test $? 0 "Forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "No forwarding using backup nexthop ID"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'"
+	log_test $? 0 "Tx drop increased"
+
+	# Non-group FDB nexthop.
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 1"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 1\""
+	log_test $? 0 "Non-group FDB nexthop as backup nexthop"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 4
+	log_test $? 0 "Forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "No forwarding using backup nexthop ID"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'"
+	log_test $? 0 "Tx drop increased"
+
+	# IPv6 address family nexthop.
+	run_cmd "ip -n $sw1 nexthop replace id 100 via 2001:db8:100::1 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 200 via 2001:db8:100::1 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 300 group 100/200 fdb"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 300"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 300\""
+	log_test $? 0 "IPv6 address family nexthop as backup nexthop"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 5
+	log_test $? 0 "Forwarding out of vx0"
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
+	log_test $? 0 "No forwarding using backup nexthop ID"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'"
+	log_test $? 0 "Tx drop increased"
+}
+
+backup_nhid_ping()
+{
+	local sw1_mac
+	local sw2_mac
+
+	echo
+	echo "Backup nexthop ID - ping"
+	echo "------------------------"
+
+	# Test bidirectional traffic when traffic is redirected in both VTEPs.
+	sw1_mac=$(ip -n $sw1 -j -p link show br0.10 | jq -r '.[]["address"]')
+	sw2_mac=$(ip -n $sw2 -j -p link show br0.10 | jq -r '.[]["address"]')
+
+	run_cmd "bridge -n $sw1 fdb replace $sw2_mac dev swp1 master static vlan 10"
+	run_cmd "bridge -n $sw2 fdb replace $sw1_mac dev swp1 master static vlan 10"
+
+	run_cmd "ip -n $sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10"
+	run_cmd "ip -n $sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10"
+
+	run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw2 nexthop replace id 1 via 192.0.2.33 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 10 group 1 fdb"
+	run_cmd "ip -n $sw2 nexthop replace id 10 group 1 fdb"
+
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw2 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+	run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10"
+
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	run_cmd "ip -n $sw2 link set dev swp1 carrier off"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw2 swp1 disabled
+
+	run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+	log_test $? 0 "Ping with backup nexthop ID"
+
+	# Reset the backup nexthop ID to 0 and check that ping fails.
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0"
+	run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 0"
+
+	run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+	log_test $? 1 "Ping after disabling backup nexthop ID"
+}
+
+backup_nhid_add_del_loop()
+{
+	while true; do
+		ip -n $sw1 nexthop del id 10
+		ip -n $sw1 nexthop replace id 10 group 1/2 fdb
+	done >/dev/null 2>&1
+}
+
+backup_nhid_torture()
+{
+	local dmac=00:11:22:33:44:55
+	local smac=00:aa:bb:cc:dd:ee
+	local pid1
+	local pid2
+	local pid3
+
+	echo
+	echo "Backup nexthop ID - torture test"
+	echo "--------------------------------"
+
+	# Continuously send traffic through the backup nexthop while adding and
+	# deleting the group. The test is considered successful if nothing
+	# crashed.
+
+	run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
+
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+
+	backup_nhid_add_del_loop &
+	pid1=$!
+	ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 &
+	pid2=$!
+
+	sleep 30
+	kill -9 $pid1 $pid2
+	wait $pid1 $pid2 2>/dev/null
+
+	log_test 0 0 "Torture test"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          Verbose mode (show commands and output)
+        -w          Timeout for ping
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvhw:" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		w) PING_TIMEOUT=$OPTARG;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+	echo "SKIP: Could not run test without bridge tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tc)" ]; then
+	echo "SKIP: Could not run test without tc tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+	echo "SKIP: Could not run test without mausezahn tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+	echo "SKIP: Could not run test without jq tool"
+	exit $ksft_skip
+fi
+
+bridge link help 2>&1 | grep -q "backup_nhid"
+if [ $? -ne 0 ]; then
+   echo "SKIP: iproute2 bridge too old, missing backup nexthop ID support"
+   exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+	setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
new file mode 100755
index 000000000000..8533393a4f18
--- /dev/null
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -0,0 +1,855 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking bridge neighbor suppression functionality. The
+# topology consists of two bridges (VTEPs) connected using VXLAN. A single
+# host is connected to each bridge over multiple VLANs. The test checks that
+# ARP/NS messages from the first host are suppressed on the VXLAN port when
+# should.
+#
+# +-----------------------+              +------------------------+
+# | h1                    |              | h2                     |
+# |                       |              |                        |
+# | + eth0.10             |              | + eth0.10              |
+# | | 192.0.2.1/28        |              | | 192.0.2.2/28         |
+# | | 2001:db8:1::1/64    |              | | 2001:db8:1::2/64     |
+# | |                     |              | |                      |
+# | |  + eth0.20          |              | |  + eth0.20           |
+# | \  | 192.0.2.17/28    |              | \  | 192.0.2.18/28     |
+# |  \ | 2001:db8:2::1/64 |              |  \ | 2001:db8:2::2/64  |
+# |   \|                  |              |   \|                   |
+# |    + eth0             |              |    + eth0              |
+# +----|------------------+              +----|-------------------+
+#      |                                      |
+#      |                                      |
+# +----|-------------------------------+ +----|-------------------------------+
+# |    + swp1                   + vx0  | |    + swp1                   + vx0  |
+# |    |                        |      | |    |                        |      |
+# |    |           br0          |      | |    |                        |      |
+# |    +------------+-----------+      | |    +------------+-----------+      |
+# |                 |                  | |                 |                  |
+# |                 |                  | |                 |                  |
+# |             +---+---+              | |             +---+---+              |
+# |             |       |              | |             |       |              |
+# |             |       |              | |             |       |              |
+# |             +       +              | |             +       +              |
+# |          br0.10  br0.20            | |          br0.10  br0.20            |
+# |                                    | |                                    |
+# |                 192.0.2.33         | |                 192.0.2.34         |
+# |                 + lo               | |                 + lo               |
+# |                                    | |                                    |
+# |                                    | |                                    |
+# |                   192.0.2.49/28    | |    192.0.2.50/28                   |
+# |                           veth0 +-------+ veth0                           |
+# |                                    | |                                    |
+# | sw1                                | | sw2                                |
+# +------------------------------------+ +------------------------------------+
+
+source lib.sh
+ret=0
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+	neigh_suppress_arp
+	neigh_suppress_ns
+	neigh_vlan_suppress_arp
+	neigh_vlan_suppress_ns
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "$VERBOSE" = "1" ]; then
+			echo "    rc=$rc, expected $expected"
+		fi
+
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+tc_check_packets()
+{
+	local ns=$1; shift
+	local id=$1; shift
+	local handle=$1; shift
+	local count=$1; shift
+	local pkts
+
+	sleep 0.1
+	pkts=$(tc -n $ns -j -s filter show $id \
+		| jq ".[] | select(.options.handle == $handle) | \
+		.options.actions[0].stats.packets")
+	[[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup_topo_ns()
+{
+	local ns=$1; shift
+
+	ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+	ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+setup_topo()
+{
+	local ns
+
+	setup_ns h1 h2 sw1 sw2
+	for ns in $h1 $h2 $sw1 $sw2; do
+		setup_topo_ns $ns
+	done
+
+	ip link add name veth0 type veth peer name veth1
+	ip link set dev veth0 netns $h1 name eth0
+	ip link set dev veth1 netns $sw1 name swp1
+
+	ip link add name veth0 type veth peer name veth1
+	ip link set dev veth0 netns $sw1 name veth0
+	ip link set dev veth1 netns $sw2 name veth0
+
+	ip link add name veth0 type veth peer name veth1
+	ip link set dev veth0 netns $h2 name eth0
+	ip link set dev veth1 netns $sw2 name swp1
+}
+
+setup_host_common()
+{
+	local ns=$1; shift
+	local v4addr1=$1; shift
+	local v4addr2=$1; shift
+	local v6addr1=$1; shift
+	local v6addr2=$1; shift
+
+	ip -n $ns link set dev eth0 up
+	ip -n $ns link add link eth0 name eth0.10 up type vlan id 10
+	ip -n $ns link add link eth0 name eth0.20 up type vlan id 20
+
+	ip -n $ns address add $v4addr1 dev eth0.10
+	ip -n $ns address add $v4addr2 dev eth0.20
+	ip -n $ns address add $v6addr1 dev eth0.10
+	ip -n $ns address add $v6addr2 dev eth0.20
+}
+
+setup_h1()
+{
+	local ns=$h1
+	local v4addr1=192.0.2.1/28
+	local v4addr2=192.0.2.17/28
+	local v6addr1=2001:db8:1::1/64
+	local v6addr2=2001:db8:2::1/64
+
+	setup_host_common $ns $v4addr1 $v4addr2 $v6addr1 $v6addr2
+}
+
+setup_h2()
+{
+	local ns=$h2
+	local v4addr1=192.0.2.2/28
+	local v4addr2=192.0.2.18/28
+	local v6addr1=2001:db8:1::2/64
+	local v6addr2=2001:db8:2::2/64
+
+	setup_host_common $ns $v4addr1 $v4addr2 $v6addr1 $v6addr2
+}
+
+setup_sw_common()
+{
+	local ns=$1; shift
+	local local_addr=$1; shift
+	local remote_addr=$1; shift
+	local veth_addr=$1; shift
+	local gw_addr=$1; shift
+
+	ip -n $ns address add $local_addr/32 dev lo
+
+	ip -n $ns link set dev veth0 up
+	ip -n $ns address add $veth_addr/28 dev veth0
+	ip -n $ns route add default via $gw_addr
+
+	ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+		vlan_default_pvid 0 mcast_snooping 0
+
+	ip -n $ns link add link br0 name br0.10 up type vlan id 10
+	bridge -n $ns vlan add vid 10 dev br0 self
+
+	ip -n $ns link add link br0 name br0.20 up type vlan id 20
+	bridge -n $ns vlan add vid 20 dev br0 self
+
+	ip -n $ns link set dev swp1 up master br0
+	bridge -n $ns vlan add vid 10 dev swp1
+	bridge -n $ns vlan add vid 20 dev swp1
+
+	ip -n $ns link add name vx0 up master br0 type vxlan \
+		local $local_addr dstport 4789 nolearning external
+	bridge -n $ns fdb add 00:00:00:00:00:00 dev vx0 self static \
+		dst $remote_addr src_vni 10010
+	bridge -n $ns fdb add 00:00:00:00:00:00 dev vx0 self static \
+		dst $remote_addr src_vni 10020
+	bridge -n $ns link set dev vx0 vlan_tunnel on learning off
+
+	bridge -n $ns vlan add vid 10 dev vx0
+	bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+
+	bridge -n $ns vlan add vid 20 dev vx0
+	bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020
+}
+
+setup_sw1()
+{
+	local ns=$sw1
+	local local_addr=192.0.2.33
+	local remote_addr=192.0.2.34
+	local veth_addr=192.0.2.49
+	local gw_addr=192.0.2.50
+
+	setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr
+}
+
+setup_sw2()
+{
+	local ns=$sw2
+	local local_addr=192.0.2.34
+	local remote_addr=192.0.2.33
+	local veth_addr=192.0.2.50
+	local gw_addr=192.0.2.49
+
+	setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr
+}
+
+setup()
+{
+	set -e
+
+	setup_topo
+	setup_h1
+	setup_h2
+	setup_sw1
+	setup_sw2
+
+	sleep 5
+
+	set +e
+}
+
+cleanup()
+{
+	cleanup_ns $h1 $h2 $sw1 $sw2
+}
+
+################################################################################
+# Tests
+
+neigh_suppress_arp_common()
+{
+	local vid=$1; shift
+	local sip=$1; shift
+	local tip=$1; shift
+	local h2_mac
+
+	echo
+	echo "Per-port ARP suppression - VLAN $vid"
+	echo "----------------------------------"
+
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass"
+
+	# Initial state - check that ARP requests are not suppressed and that
+	# ARP replies are received.
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	log_test $? 0 "arping"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "ARP suppression"
+
+	# Enable neighbor suppression and check that nothing changes compared
+	# to the initial state.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	log_test $? 0 "arping"
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "ARP suppression"
+
+	# Install an FDB entry for the remote host and check that nothing
+	# changes compared to the initial state.
+	h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
+	log_test $? 0 "FDB entry installation"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	log_test $? 0 "arping"
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
+	log_test $? 0 "ARP suppression"
+
+	# Install a neighbor on the matching SVI interface and check that ARP
+	# requests are suppressed.
+	run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid"
+	log_test $? 0 "Neighbor entry installation"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	log_test $? 0 "arping"
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
+	log_test $? 0 "ARP suppression"
+
+	# Take the second host down and check that ARP requests are suppressed
+	# and that ARP replies are received.
+	run_cmd "ip -n $h2 link set dev eth0.$vid down"
+	log_test $? 0 "H2 down"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	log_test $? 0 "arping"
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
+	log_test $? 0 "ARP suppression"
+
+	run_cmd "ip -n $h2 link set dev eth0.$vid up"
+	log_test $? 0 "H2 up"
+
+	# Disable neighbor suppression and check that ARP requests are no
+	# longer suppressed.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+	log_test $? 0 "\"neigh_suppress\" is off"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	log_test $? 0 "arping"
+	tc_check_packets $sw1 "dev vx0 egress" 101 4
+	log_test $? 0 "ARP suppression"
+
+	# Take the second host down and check that ARP requests are not
+	# suppressed and that ARP replies are not received.
+	run_cmd "ip -n $h2 link set dev eth0.$vid down"
+	log_test $? 0 "H2 down"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	log_test $? 1 "arping"
+	tc_check_packets $sw1 "dev vx0 egress" 101 5
+	log_test $? 0 "ARP suppression"
+}
+
+neigh_suppress_arp()
+{
+	local vid=10
+	local sip=192.0.2.1
+	local tip=192.0.2.2
+
+	neigh_suppress_arp_common $vid $sip $tip
+
+	vid=20
+	sip=192.0.2.17
+	tip=192.0.2.18
+	neigh_suppress_arp_common $vid $sip $tip
+}
+
+neigh_suppress_ns_common()
+{
+	local vid=$1; shift
+	local saddr=$1; shift
+	local daddr=$1; shift
+	local maddr=$1; shift
+	local h2_mac
+
+	echo
+	echo "Per-port NS suppression - VLAN $vid"
+	echo "---------------------------------"
+
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass"
+
+	# Initial state - check that NS messages are not suppressed and that ND
+	# messages are received.
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	log_test $? 0 "ndisc6"
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "NS suppression"
+
+	# Enable neighbor suppression and check that nothing changes compared
+	# to the initial state.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	log_test $? 0 "ndisc6"
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "NS suppression"
+
+	# Install an FDB entry for the remote host and check that nothing
+	# changes compared to the initial state.
+	h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
+	log_test $? 0 "FDB entry installation"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	log_test $? 0 "ndisc6"
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
+	log_test $? 0 "NS suppression"
+
+	# Install a neighbor on the matching SVI interface and check that NS
+	# messages are suppressed.
+	run_cmd "ip -n $sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid"
+	log_test $? 0 "Neighbor entry installation"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	log_test $? 0 "ndisc6"
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
+	log_test $? 0 "NS suppression"
+
+	# Take the second host down and check that NS messages are suppressed
+	# and that ND messages are received.
+	run_cmd "ip -n $h2 link set dev eth0.$vid down"
+	log_test $? 0 "H2 down"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	log_test $? 0 "ndisc6"
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
+	log_test $? 0 "NS suppression"
+
+	run_cmd "ip -n $h2 link set dev eth0.$vid up"
+	log_test $? 0 "H2 up"
+
+	# Disable neighbor suppression and check that NS messages are no longer
+	# suppressed.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+	log_test $? 0 "\"neigh_suppress\" is off"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	log_test $? 0 "ndisc6"
+	tc_check_packets $sw1 "dev vx0 egress" 101 4
+	log_test $? 0 "NS suppression"
+
+	# Take the second host down and check that NS messages are not
+	# suppressed and that ND messages are not received.
+	run_cmd "ip -n $h2 link set dev eth0.$vid down"
+	log_test $? 0 "H2 down"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	log_test $? 2 "ndisc6"
+	tc_check_packets $sw1 "dev vx0 egress" 101 5
+	log_test $? 0 "NS suppression"
+}
+
+neigh_suppress_ns()
+{
+	local vid=10
+	local saddr=2001:db8:1::1
+	local daddr=2001:db8:1::2
+	local maddr=ff02::1:ff00:2
+
+	neigh_suppress_ns_common $vid $saddr $daddr $maddr
+
+	vid=20
+	saddr=2001:db8:2::1
+	daddr=2001:db8:2::2
+	maddr=ff02::1:ff00:2
+
+	neigh_suppress_ns_common $vid $saddr $daddr $maddr
+}
+
+neigh_vlan_suppress_arp()
+{
+	local vid1=10
+	local vid2=20
+	local sip1=192.0.2.1
+	local sip2=192.0.2.17
+	local tip1=192.0.2.2
+	local tip2=192.0.2.18
+	local h2_mac1
+	local h2_mac2
+
+	echo
+	echo "Per-{Port, VLAN} ARP suppression"
+	echo "--------------------------------"
+
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass"
+
+	h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
+	h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
+	run_cmd "ip -n $sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
+	run_cmd "ip -n $sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
+
+	# Enable per-{Port, VLAN} neighbor suppression and check that ARP
+	# requests are not suppressed and that ARP replies are received.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
+	log_test $? 0 "\"neigh_vlan_suppress\" is on"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	log_test $? 0 "arping (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	log_test $? 0 "arping (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "ARP suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 1
+	log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+	# Enable neighbor suppression on VLAN 10 and check that only on this
+	# VLAN ARP requests are suppressed.
+	run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
+	log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	log_test $? 0 "arping (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	log_test $? 0 "arping (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "ARP suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 2
+	log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+	# Enable neighbor suppression on the port and check that it has no
+	# effect compared to previous state.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	log_test $? 0 "arping (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	log_test $? 0 "arping (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "ARP suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 3
+	log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+	# Disable neighbor suppression on the port and check that it has no
+	# effect compared to previous state.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+	log_test $? 0 "\"neigh_suppress\" is off"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	log_test $? 0 "arping (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	log_test $? 0 "arping (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "ARP suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 4
+	log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+	# Disable neighbor suppression on VLAN 10 and check that ARP requests
+	# are no longer suppressed on this VLAN.
+	run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
+	log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	log_test $? 0 "arping (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	log_test $? 0 "arping (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "ARP suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 5
+	log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+	# Disable per-{Port, VLAN} neighbor suppression, enable neighbor
+	# suppression on the port and check that on both VLANs ARP requests are
+	# suppressed.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
+	log_test $? 0 "\"neigh_vlan_suppress\" is off"
+
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	log_test $? 0 "arping (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	log_test $? 0 "arping (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "ARP suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 5
+	log_test $? 0 "ARP suppression (VLAN $vid2)"
+}
+
+neigh_vlan_suppress_ns()
+{
+	local vid1=10
+	local vid2=20
+	local saddr1=2001:db8:1::1
+	local saddr2=2001:db8:2::1
+	local daddr1=2001:db8:1::2
+	local daddr2=2001:db8:2::2
+	local maddr=ff02::1:ff00:2
+	local h2_mac1
+	local h2_mac2
+
+	echo
+	echo "Per-{Port, VLAN} NS suppression"
+	echo "-------------------------------"
+
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass"
+
+	h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
+	h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
+	run_cmd "ip -n $sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
+	run_cmd "ip -n $sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
+
+	# Enable per-{Port, VLAN} neighbor suppression and check that NS
+	# messages are not suppressed and that ND messages are received.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
+	log_test $? 0 "\"neigh_vlan_suppress\" is on"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	log_test $? 0 "ndisc6 (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "NS suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 1
+	log_test $? 0 "NS suppression (VLAN $vid2)"
+
+	# Enable neighbor suppression on VLAN 10 and check that only on this
+	# VLAN NS messages are suppressed.
+	run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
+	log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	log_test $? 0 "ndisc6 (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "NS suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 2
+	log_test $? 0 "NS suppression (VLAN $vid2)"
+
+	# Enable neighbor suppression on the port and check that it has no
+	# effect compared to previous state.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	log_test $? 0 "ndisc6 (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "NS suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 3
+	log_test $? 0 "NS suppression (VLAN $vid2)"
+
+	# Disable neighbor suppression on the port and check that it has no
+	# effect compared to previous state.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+	log_test $? 0 "\"neigh_suppress\" is off"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	log_test $? 0 "ndisc6 (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	log_test $? 0 "NS suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 4
+	log_test $? 0 "NS suppression (VLAN $vid2)"
+
+	# Disable neighbor suppression on VLAN 10 and check that NS messages
+	# are no longer suppressed on this VLAN.
+	run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
+	log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	log_test $? 0 "ndisc6 (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "NS suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 5
+	log_test $? 0 "NS suppression (VLAN $vid2)"
+
+	# Disable per-{Port, VLAN} neighbor suppression, enable neighbor
+	# suppression on the port and check that on both VLANs NS messages are
+	# suppressed.
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
+	log_test $? 0 "\"neigh_vlan_suppress\" is off"
+
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	log_test $? 0 "ndisc6 (VLAN $vid1)"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "NS suppression (VLAN $vid1)"
+	tc_check_packets $sw1 "dev vx0 egress" 102 5
+	log_test $? 0 "NS suppression (VLAN $vid2)"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvh" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+	echo "SKIP: Could not run test without bridge tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tc)" ]; then
+	echo "SKIP: Could not run test without tc tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v arping)" ]; then
+	echo "SKIP: Could not run test without arping tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ndisc6)" ]; then
+	echo "SKIP: Could not run test without ndisc6 tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+	echo "SKIP: Could not run test without jq tool"
+	exit $ksft_skip
+fi
+
+bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
+if [ $? -ne 0 ]; then
+   echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
+   exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+	setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/test_ingress_egress_chaining.sh b/tools/testing/selftests/net/test_ingress_egress_chaining.sh
new file mode 100644
index 000000000000..08adff6bb3b6
--- /dev/null
+++ b/tools/testing/selftests/net/test_ingress_egress_chaining.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test runs a simple ingress tc setup between two veth pairs,
+# and chains a single egress rule to test ingress chaining to egress.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip
+fi
+
+needed_mods="act_mirred cls_flower sch_ingress"
+for mod in $needed_mods; do
+	modinfo $mod &>/dev/null || { echo "SKIP: Need act_mirred module"; exit $ksft_skip; }
+done
+
+ns="ns$((RANDOM%899+100))"
+veth1="veth1$((RANDOM%899+100))"
+veth2="veth2$((RANDOM%899+100))"
+peer1="peer1$((RANDOM%899+100))"
+peer2="peer2$((RANDOM%899+100))"
+ip_peer1=198.51.100.5
+ip_peer2=198.51.100.6
+
+function fail() {
+	echo "FAIL: $@" >> /dev/stderr
+	exit 1
+}
+
+function cleanup() {
+	killall -q -9 udpgso_bench_rx
+	ip link del $veth1 &> /dev/null
+	ip link del $veth2 &> /dev/null
+	ip netns del $ns &> /dev/null
+}
+trap cleanup EXIT
+
+function config() {
+	echo "Setup veth pairs [$veth1, $peer1], and veth pair [$veth2, $peer2]"
+	ip link add $veth1 type veth peer name $peer1
+	ip link add $veth2 type veth peer name $peer2
+	ip addr add $ip_peer1/24 dev $peer1
+	ip link set $peer1 up
+	ip netns add $ns
+	ip link set dev $peer2 netns $ns
+	ip netns exec $ns ip addr add $ip_peer2/24 dev $peer2
+	ip netns exec $ns ip link set $peer2 up
+	ip link set $veth1 up
+	ip link set $veth2 up
+
+	echo "Add tc filter ingress->egress forwarding $veth1 <-> $veth2"
+	tc qdisc add dev $veth2 ingress
+	tc qdisc add dev $veth1 ingress
+	tc filter add dev $veth2 ingress prio 1 proto all flower \
+		action mirred egress redirect dev $veth1
+	tc filter add dev $veth1 ingress prio 1 proto all flower \
+		action mirred egress redirect dev $veth2
+
+	echo "Add tc filter egress->ingress forwarding $peer1 -> $veth1, bypassing the veth pipe"
+	tc qdisc add dev $peer1 clsact
+	tc filter add dev $peer1 egress prio 20 proto ip flower \
+		action mirred ingress redirect dev $veth1
+}
+
+function test_run() {
+	echo "Run tcp traffic"
+	./udpgso_bench_rx -t &
+	sleep 1
+	ip netns exec $ns timeout -k 2 10 ./udpgso_bench_tx -t -l 2 -4 -D $ip_peer1 || fail "traffic failed"
+	echo "Test passed"
+}
+
+config
+test_run
+trap - EXIT
+cleanup
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
new file mode 100755
index 000000000000..74ff9fb2a6f0
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -0,0 +1,2511 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking VXLAN MDB functionality. The topology consists of
+# two sets of namespaces: One for the testing of IPv4 underlay and another for
+# IPv6. In both cases, both IPv4 and IPv6 overlay traffic are tested.
+#
+# Data path functionality is tested by sending traffic from one of the upper
+# namespaces and checking using ingress tc filters that the expected traffic
+# was received by one of the lower namespaces.
+#
+# +------------------------------------+ +------------------------------------+
+# | ns1_v4                             | | ns1_v6                             |
+# |                                    | |                                    |
+# |    br0.10    br0.4000  br0.20      | |    br0.10    br0.4000  br0.20      |
+# |       +         +         +        | |       +         +         +        |
+# |       |         |         |        | |       |         |         |        |
+# |       |         |         |        | |       |         |         |        |
+# |       +---------+---------+        | |       +---------+---------+        |
+# |                 |                  | |                 |                  |
+# |                 |                  | |                 |                  |
+# |                 +                  | |                 +                  |
+# |                br0                 | |                br0                 |
+# |                 +                  | |                 +                  |
+# |                 |                  | |                 |                  |
+# |                 |                  | |                 |                  |
+# |                 +                  | |                 +                  |
+# |                vx0                 | |                vx0                 |
+# |                                    | |                                    |
+# |                                    | |                                    |
+# |               veth0                | |               veth0                |
+# |                 +                  | |                 +                  |
+# +-----------------|------------------+ +-----------------|------------------+
+#                   |                                      |
+# +-----------------|------------------+ +-----------------|------------------+
+# |                 +                  | |                 +                  |
+# |               veth0                | |               veth0                |
+# |                                    | |                                    |
+# |                                    | |                                    |
+# |                vx0                 | |                vx0                 |
+# |                 +                  | |                 +                  |
+# |                 |                  | |                 |                  |
+# |                 |                  | |                 |                  |
+# |                 +                  | |                 +                  |
+# |                br0                 | |                br0                 |
+# |                 +                  | |                 +                  |
+# |                 |                  | |                 |                  |
+# |                 |                  | |                 |                  |
+# |       +---------+---------+        | |       +---------+---------+        |
+# |       |         |         |        | |       |         |         |        |
+# |       |         |         |        | |       |         |         |        |
+# |       +         +         +        | |       +         +         +        |
+# |    br0.10    br0.4000  br0.10      | |    br0.10    br0.4000  br0.20      |
+# |                                    | |                                    |
+# | ns2_v4                             | | ns2_v6                             |
+# +------------------------------------+ +------------------------------------+
+
+source lib.sh
+ret=0
+
+CONTROL_PATH_TESTS="
+	basic_star_g_ipv4_ipv4
+	basic_star_g_ipv6_ipv4
+	basic_star_g_ipv4_ipv6
+	basic_star_g_ipv6_ipv6
+	basic_sg_ipv4_ipv4
+	basic_sg_ipv6_ipv4
+	basic_sg_ipv4_ipv6
+	basic_sg_ipv6_ipv6
+	star_g_ipv4_ipv4
+	star_g_ipv6_ipv4
+	star_g_ipv4_ipv6
+	star_g_ipv6_ipv6
+	sg_ipv4_ipv4
+	sg_ipv6_ipv4
+	sg_ipv4_ipv6
+	sg_ipv6_ipv6
+	dump_ipv4_ipv4
+	dump_ipv6_ipv4
+	dump_ipv4_ipv6
+	dump_ipv6_ipv6
+	flush
+"
+
+DATA_PATH_TESTS="
+	encap_params_ipv4_ipv4
+	encap_params_ipv6_ipv4
+	encap_params_ipv4_ipv6
+	encap_params_ipv6_ipv6
+	starg_exclude_ir_ipv4_ipv4
+	starg_exclude_ir_ipv6_ipv4
+	starg_exclude_ir_ipv4_ipv6
+	starg_exclude_ir_ipv6_ipv6
+	starg_include_ir_ipv4_ipv4
+	starg_include_ir_ipv6_ipv4
+	starg_include_ir_ipv4_ipv6
+	starg_include_ir_ipv6_ipv6
+	starg_exclude_p2mp_ipv4_ipv4
+	starg_exclude_p2mp_ipv6_ipv4
+	starg_exclude_p2mp_ipv4_ipv6
+	starg_exclude_p2mp_ipv6_ipv6
+	starg_include_p2mp_ipv4_ipv4
+	starg_include_p2mp_ipv6_ipv4
+	starg_include_p2mp_ipv4_ipv6
+	starg_include_p2mp_ipv6_ipv6
+	egress_vni_translation_ipv4_ipv4
+	egress_vni_translation_ipv6_ipv4
+	egress_vni_translation_ipv4_ipv6
+	egress_vni_translation_ipv6_ipv6
+	all_zeros_mdb_ipv4
+	all_zeros_mdb_ipv6
+	mdb_fdb_ipv4_ipv4
+	mdb_fdb_ipv6_ipv4
+	mdb_fdb_ipv4_ipv6
+	mdb_fdb_ipv6_ipv6
+	mdb_torture_ipv4_ipv4
+	mdb_torture_ipv6_ipv4
+	mdb_torture_ipv4_ipv6
+	mdb_torture_ipv6_ipv6
+"
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+	$CONTROL_PATH_TESTS
+	$DATA_PATH_TESTS
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "$VERBOSE" = "1" ]; then
+			echo "    rc=$rc, expected $expected"
+		fi
+
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+tc_check_packets()
+{
+	local ns=$1; shift
+	local id=$1; shift
+	local handle=$1; shift
+	local count=$1; shift
+	local pkts
+
+	sleep 0.1
+	pkts=$(tc -n $ns -j -s filter show $id \
+		| jq ".[] | select(.options.handle == $handle) | \
+		.options.actions[0].stats.packets")
+	[[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup_common_ns()
+{
+	local ns=$1; shift
+	local local_addr=$1; shift
+
+	ip netns exec $ns sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec $ns sysctl -qw net.ipv4.fib_multipath_use_neigh=1
+	ip netns exec $ns sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.default.forwarding=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+	ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+	ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+
+	ip -n $ns link set dev lo up
+	ip -n $ns address add $local_addr dev lo
+
+	ip -n $ns link set dev veth0 up
+
+	ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+		vlan_default_pvid 0 mcast_snooping 0
+
+	ip -n $ns link add link br0 name br0.10 up type vlan id 10
+	bridge -n $ns vlan add vid 10 dev br0 self
+
+	ip -n $ns link add link br0 name br0.20 up type vlan id 20
+	bridge -n $ns vlan add vid 20 dev br0 self
+
+	ip -n $ns link add link br0 name br0.4000 up type vlan id 4000
+	bridge -n $ns vlan add vid 4000 dev br0 self
+
+	ip -n $ns link add name vx0 up master br0 type vxlan \
+		local $local_addr dstport 4789 external vnifilter
+	bridge -n $ns link set dev vx0 vlan_tunnel on
+
+	bridge -n $ns vlan add vid 10 dev vx0
+	bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+	bridge -n $ns vni add vni 10010 dev vx0
+
+	bridge -n $ns vlan add vid 20 dev vx0
+	bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020
+	bridge -n $ns vni add vni 10020 dev vx0
+
+	bridge -n $ns vlan add vid 4000 dev vx0 pvid
+	bridge -n $ns vlan add vid 4000 dev vx0 tunnel_info id 14000
+	bridge -n $ns vni add vni 14000 dev vx0
+}
+
+setup_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local local_addr1=$1; shift
+	local local_addr2=$1; shift
+
+	ip link add name veth0 type veth peer name veth1
+	ip link set dev veth0 netns $ns1 name veth0
+	ip link set dev veth1 netns $ns2 name veth0
+
+	setup_common_ns $ns1 $local_addr1
+	setup_common_ns $ns2 $local_addr2
+}
+
+setup_v4()
+{
+	setup_ns ns1_v4 ns2_v4
+	setup_common $ns1_v4 $ns2_v4 192.0.2.1 192.0.2.2
+
+	ip -n $ns1_v4 address add 192.0.2.17/28 dev veth0
+	ip -n $ns2_v4 address add 192.0.2.18/28 dev veth0
+
+	ip -n $ns1_v4 route add default via 192.0.2.18
+	ip -n $ns2_v4 route add default via 192.0.2.17
+}
+
+cleanup_v4()
+{
+	cleanup_ns $ns2_v4 $ns1_v4
+}
+
+setup_v6()
+{
+	setup_ns ns1_v6 ns2_v6
+	setup_common $ns1_v6 $ns2_v6 2001:db8:1::1 2001:db8:1::2
+
+	ip -n $ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad
+	ip -n $ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad
+
+	ip -n $ns1_v6 route add default via 2001:db8:2::2
+	ip -n $ns2_v6 route add default via 2001:db8:2::1
+}
+
+cleanup_v6()
+{
+	cleanup_ns $ns2_v6 $ns1_v6
+}
+
+setup()
+{
+	set -e
+
+	setup_v4
+	setup_v6
+
+	sleep 5
+
+	set +e
+}
+
+cleanup()
+{
+	cleanup_v6 &> /dev/null
+	cleanup_v4 &> /dev/null
+}
+
+################################################################################
+# Tests - Control path
+
+basic_common()
+{
+	local ns1=$1; shift
+	local grp_key=$1; shift
+	local vtep_ip=$1; shift
+
+	# Test basic control path operations common to all MDB entry types.
+
+	# Basic add, replace and delete behavior.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	log_test $? 0 "MDB entry addition"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
+	log_test $? 0 "MDB entry presence after addition"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	log_test $? 0 "MDB entry replacement"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
+	log_test $? 0 "MDB entry presence after replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+	log_test $? 0 "MDB entry deletion"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
+	log_test $? 254 "MDB entry presence after deletion"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+	log_test $? 255 "Non-existent MDB entry deletion"
+
+	# Default protocol and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto static\""
+	log_test $? 0 "MDB entry default protocol"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto 123\""
+	log_test $? 0 "MDB entry protocol replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+	# Default destination port and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" dst_port \""
+	log_test $? 1 "MDB entry default destination port"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"dst_port 1234\""
+	log_test $? 0 "MDB entry destination port replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+	# Default destination VNI and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" vni \""
+	log_test $? 1 "MDB entry default destination VNI"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"vni 1234\""
+	log_test $? 0 "MDB entry destination VNI replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+	# Default outgoing interface and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" via \""
+	log_test $? 1 "MDB entry default outgoing interface"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"via veth0\""
+	log_test $? 0 "MDB entry outgoing interface replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+	# Common error cases.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port veth0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	log_test $? 255 "MDB entry with mismatch between device and port"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key temp dst $vtep_ip src_vni 10010"
+	log_test $? 255 "MDB entry with temp state"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent vid 10 dst $vtep_ip src_vni 10010"
+	log_test $? 255 "MDB entry with VLAN"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp 01:02:03:04:05:06 permanent dst $vtep_ip src_vni 10010"
+	log_test $? 255 "MDB entry MAC address"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent"
+	log_test $? 255 "MDB entry without extended parameters"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent proto 3 dst $vtep_ip src_vni 10010"
+	log_test $? 255 "MDB entry with an invalid protocol"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni $((2 ** 24)) src_vni 10010"
+	log_test $? 255 "MDB entry with an invalid destination VNI"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni $((2 ** 24))"
+	log_test $? 255 "MDB entry with an invalid source VNI"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent src_vni 10010"
+	log_test $? 255 "MDB entry without a remote destination IP"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+	log_test $? 255 "Duplicate MDB entries"
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+}
+
+basic_star_g_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local grp_key="grp 239.1.1.1"
+	local vtep_ip=198.51.100.100
+
+	echo
+	echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay"
+	echo "--------------------------------------------------------------------"
+
+	basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local grp_key="grp ff0e::1"
+	local vtep_ip=198.51.100.100
+
+	echo
+	echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv4 underlay"
+	echo "--------------------------------------------------------------------"
+
+	basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local grp_key="grp 239.1.1.1"
+	local vtep_ip=2001:db8:1000::1
+
+	echo
+	echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv6 underlay"
+	echo "--------------------------------------------------------------------"
+
+	basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local grp_key="grp ff0e::1"
+	local vtep_ip=2001:db8:1000::1
+
+	echo
+	echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv6 underlay"
+	echo "--------------------------------------------------------------------"
+
+	basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local grp_key="grp 239.1.1.1 src 192.0.2.129"
+	local vtep_ip=198.51.100.100
+
+	echo
+	echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv4 underlay"
+	echo "--------------------------------------------------------------------"
+
+	basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local grp_key="grp ff0e::1 src 2001:db8:100::1"
+	local vtep_ip=198.51.100.100
+
+	echo
+	echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv4 underlay"
+	echo "---------------------------------------------------------------------"
+
+	basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local grp_key="grp 239.1.1.1 src 192.0.2.129"
+	local vtep_ip=2001:db8:1000::1
+
+	echo
+	echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv6 underlay"
+	echo "--------------------------------------------------------------------"
+
+	basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local grp_key="grp ff0e::1 src 2001:db8:100::1"
+	local vtep_ip=2001:db8:1000::1
+
+	echo
+	echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv6 underlay"
+	echo "--------------------------------------------------------------------"
+
+	basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+star_g_common()
+{
+	local ns1=$1; shift
+	local grp=$1; shift
+	local src1=$1; shift
+	local src2=$1; shift
+	local src3=$1; shift
+	local vtep_ip=$1; shift
+	local all_zeros_grp=$1; shift
+
+	# Test control path operations specific to (*, G) entries.
+
+	# Basic add, replace and delete behavior.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+	log_test $? 0 "(*, G) MDB entry addition with source list"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
+	log_test $? 0 "(*, G) MDB entry presence after addition"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry presence after addition"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+	log_test $? 0 "(*, G) MDB entry replacement with source list"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
+	log_test $? 0 "(*, G) MDB entry presence after replacement"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry presence after replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+	log_test $? 0 "(*, G) MDB entry deletion"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
+	log_test $? 254 "(*, G) MDB entry presence after deletion"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+	log_test $? 254 "(S, G) MDB entry presence after deletion"
+
+	# Default filter mode and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude"
+	log_test $? 0 "(*, G) MDB entry default filter mode"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep include"
+	log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked"
+	log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\""
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude"
+	log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grep grp $grp src $src1 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked"
+	log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\""
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+	# Default source list and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep source_list"
+	log_test $? 1 "(*, G) MDB entry default source list"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry of 1st source after removing source"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010"
+	log_test $? 254 "(S, G) MDB entry of 2nd source after removing source"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010"
+	log_test $? 0 "(S, G) MDB entry of 3rd source after removing source"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+	# Default protocol and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto static\""
+	log_test $? 0 "(*, G) MDB entry default protocol"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto static\""
+	log_test $? 0 "(S, G) MDB entry default protocol"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto bgp\""
+	log_test $? 0 "(*, G) MDB entry protocol after replacement"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto bgp\""
+	log_test $? 0 "(S, G) MDB entry protocol after replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+	# Default destination port and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port \""
+	log_test $? 1 "(*, G) MDB entry default destination port"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port \""
+	log_test $? 1 "(S, G) MDB entry default destination port"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port 1234 \""
+	log_test $? 0 "(*, G) MDB entry destination port after replacement"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port 1234 \""
+	log_test $? 0 "(S, G) MDB entry destination port after replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+	# Default destination VNI and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni \""
+	log_test $? 1 "(*, G) MDB entry default destination VNI"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni \""
+	log_test $? 1 "(S, G) MDB entry default destination VNI"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni 1234 \""
+	log_test $? 0 "(*, G) MDB entry destination VNI after replacement"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni 1234 \""
+	log_test $? 0 "(S, G) MDB entry destination VNI after replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+	# Default outgoing interface and replacement.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via \""
+	log_test $? 1 "(*, G) MDB entry default outgoing interface"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via \""
+	log_test $? 1 "(S, G) MDB entry default outgoing interface"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via veth0 \""
+	log_test $? 0 "(*, G) MDB entry outgoing interface after replacement"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via veth0 \""
+	log_test $? 0 "(S, G) MDB entry outgoing interface after replacement"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+	# Error cases.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent filter_mode exclude dst $vtep_ip src_vni 10010"
+	log_test $? 255 "All-zeros group with filter mode"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent source_list $src1 dst $vtep_ip src_vni 10010"
+	log_test $? 255 "All-zeros group with source list"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode include dst $vtep_ip src_vni 10010"
+	log_test $? 255 "(*, G) INCLUDE with an empty source list"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $grp dst $vtep_ip src_vni 10010"
+	log_test $? 255 "Invalid source in source list"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent source_list $src1 dst $vtep_ip src_vni 10010"
+	log_test $? 255 "Source list without filter mode"
+}
+
+star_g_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local grp=239.1.1.1
+	local src1=192.0.2.129
+	local src2=192.0.2.130
+	local src3=192.0.2.131
+	local vtep_ip=198.51.100.100
+	local all_zeros_grp=0.0.0.0
+
+	echo
+	echo "Control path: (*, G) operations - IPv4 overlay / IPv4 underlay"
+	echo "--------------------------------------------------------------"
+
+	star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local grp=ff0e::1
+	local src1=2001:db8:100::1
+	local src2=2001:db8:100::2
+	local src3=2001:db8:100::3
+	local vtep_ip=198.51.100.100
+	local all_zeros_grp=::
+
+	echo
+	echo "Control path: (*, G) operations - IPv6 overlay / IPv4 underlay"
+	echo "--------------------------------------------------------------"
+
+	star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local grp=239.1.1.1
+	local src1=192.0.2.129
+	local src2=192.0.2.130
+	local src3=192.0.2.131
+	local vtep_ip=2001:db8:1000::1
+	local all_zeros_grp=0.0.0.0
+
+	echo
+	echo "Control path: (*, G) operations - IPv4 overlay / IPv6 underlay"
+	echo "--------------------------------------------------------------"
+
+	star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local grp=ff0e::1
+	local src1=2001:db8:100::1
+	local src2=2001:db8:100::2
+	local src3=2001:db8:100::3
+	local vtep_ip=2001:db8:1000::1
+	local all_zeros_grp=::
+
+	echo
+	echo "Control path: (*, G) operations - IPv6 overlay / IPv6 underlay"
+	echo "--------------------------------------------------------------"
+
+	star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+sg_common()
+{
+	local ns1=$1; shift
+	local grp=$1; shift
+	local src=$1; shift
+	local vtep_ip=$1; shift
+	local all_zeros_grp=$1; shift
+
+	# Test control path operations specific to (S, G) entries.
+
+	# Default filter mode.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src src_vni 10010 | grep include"
+	log_test $? 0 "(S, G) MDB entry default filter mode"
+
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
+
+	# Error cases.
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent filter_mode include dst $vtep_ip src_vni 10010"
+	log_test $? 255 "(S, G) with filter mode"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent source_list $src dst $vtep_ip src_vni 10010"
+	log_test $? 255 "(S, G) with source list"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $grp permanent dst $vtep_ip src_vni 10010"
+	log_test $? 255 "(S, G) with an invalid source list"
+
+	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp src $src permanent dst $vtep_ip src_vni 10010"
+	log_test $? 255 "All-zeros group with source"
+}
+
+sg_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local grp=239.1.1.1
+	local src=192.0.2.129
+	local vtep_ip=198.51.100.100
+	local all_zeros_grp=0.0.0.0
+
+	echo
+	echo "Control path: (S, G) operations - IPv4 overlay / IPv4 underlay"
+	echo "--------------------------------------------------------------"
+
+	sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local grp=ff0e::1
+	local src=2001:db8:100::1
+	local vtep_ip=198.51.100.100
+	local all_zeros_grp=::
+
+	echo
+	echo "Control path: (S, G) operations - IPv6 overlay / IPv4 underlay"
+	echo "--------------------------------------------------------------"
+
+	sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local grp=239.1.1.1
+	local src=192.0.2.129
+	local vtep_ip=2001:db8:1000::1
+	local all_zeros_grp=0.0.0.0
+
+	echo
+	echo "Control path: (S, G) operations - IPv4 overlay / IPv6 underlay"
+	echo "--------------------------------------------------------------"
+
+	sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local grp=ff0e::1
+	local src=2001:db8:100::1
+	local vtep_ip=2001:db8:1000::1
+	local all_zeros_grp=::
+
+	echo
+	echo "Control path: (S, G) operations - IPv6 overlay / IPv6 underlay"
+	echo "--------------------------------------------------------------"
+
+	sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+ipv4_grps_get()
+{
+	local max_grps=$1; shift
+	local i
+
+	for i in $(seq 0 $((max_grps - 1))); do
+		echo "239.1.1.$i"
+	done
+}
+
+ipv6_grps_get()
+{
+	local max_grps=$1; shift
+	local i
+
+	for i in $(seq 0 $((max_grps - 1))); do
+		echo "ff0e::$(printf %x $i)"
+	done
+}
+
+dump_common()
+{
+	local ns1=$1; shift
+	local local_addr=$1; shift
+	local remote_prefix=$1; shift
+	local fn=$1; shift
+	local max_vxlan_devs=2
+	local max_remotes=64
+	local max_grps=256
+	local num_entries
+	local batch_file
+	local grp
+	local i j
+
+	# The kernel maintains various markers for the MDB dump. Add a test for
+	# large scale MDB dump to make sure that all the configured entries are
+	# dumped and that the markers are used correctly.
+
+	# Create net devices.
+	for i in $(seq 1 $max_vxlan_devs); do
+		ip -n $ns1 link add name vx-test${i} up type vxlan \
+			local $local_addr dstport 4789 external vnifilter
+	done
+
+	# Create batch file with MDB entries.
+	batch_file=$(mktemp)
+	for i in $(seq 1 $max_vxlan_devs); do
+		for j in $(seq 1 $max_remotes); do
+			for grp in $($fn $max_grps); do
+				echo "mdb add dev vx-test${i} port vx-test${i} grp $grp permanent dst ${remote_prefix}${j}" >> $batch_file
+			done
+		done
+	done
+
+	# Program the batch file and check for expected number of entries.
+	bridge -n $ns1 -b $batch_file
+	for i in $(seq 1 $max_vxlan_devs); do
+		num_entries=$(bridge -n $ns1 mdb show dev vx-test${i} | grep "permanent" | wc -l)
+		[[ $num_entries -eq $((max_grps * max_remotes)) ]]
+		log_test $? 0 "Large scale dump - VXLAN device #$i"
+	done
+
+	rm -rf $batch_file
+}
+
+dump_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local local_addr=192.0.2.1
+	local remote_prefix=198.51.100.
+	local fn=ipv4_grps_get
+
+	echo
+	echo "Control path: Large scale MDB dump - IPv4 overlay / IPv4 underlay"
+	echo "-----------------------------------------------------------------"
+
+	dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local local_addr=192.0.2.1
+	local remote_prefix=198.51.100.
+	local fn=ipv6_grps_get
+
+	echo
+	echo "Control path: Large scale MDB dump - IPv6 overlay / IPv4 underlay"
+	echo "-----------------------------------------------------------------"
+
+	dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local local_addr=2001:db8:1::1
+	local remote_prefix=2001:db8:1000::
+	local fn=ipv4_grps_get
+
+	echo
+	echo "Control path: Large scale MDB dump - IPv4 overlay / IPv6 underlay"
+	echo "-----------------------------------------------------------------"
+
+	dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local local_addr=2001:db8:1::1
+	local remote_prefix=2001:db8:1000::
+	local fn=ipv6_grps_get
+
+	echo
+	echo "Control path: Large scale MDB dump - IPv6 overlay / IPv6 underlay"
+	echo "-----------------------------------------------------------------"
+
+	dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+flush()
+{
+	local num_entries
+
+	echo
+	echo "Control path: Flush"
+	echo "-------------------"
+
+	# Add entries with different attributes and check that they are all
+	# flushed when the flush command is given with no parameters.
+
+	# Different source VNI.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.2 permanent dst 198.51.100.1 src_vni 10011"
+
+	# Different routing protocol.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.3 permanent proto bgp dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.4 permanent proto zebra dst 198.51.100.1 src_vni 10010"
+
+	# Different destination IP.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.5 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.6 permanent dst 198.51.100.2 src_vni 10010"
+
+	# Different destination port.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.7 permanent dst 198.51.100.1 dst_port 11111 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.8 permanent dst 198.51.100.1 dst_port 22222 src_vni 10010"
+
+	# Different VNI.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.9 permanent dst 198.51.100.1 vni 10010 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.10 permanent dst 198.51.100.1 vni 10020 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+	num_entries=$(bridge -n $ns1_v4 mdb show dev vx0 | wc -l)
+	[[ $num_entries -eq 0 ]]
+	log_test $? 0 "Flush all"
+
+	# Check that entries are flushed when port is specified as the VXLAN
+	# device and that an error is returned when port is specified as a
+	# different net device.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+	log_test $? 254 "Flush by port - matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0"
+	log_test $? 255 "Flush by port - non-matching"
+
+	# Check that when flushing by source VNI only entries programmed with
+	# the specified source VNI are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10011"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10011"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+	log_test $? 254 "Flush by source VNI - matching"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011"
+	log_test $? 0 "Flush by source VNI - non-matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Check that all entries are flushed when "permanent" is specified and
+	# that an error is returned when "nopermanent" is specified.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 permanent"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+	log_test $? 254 "Flush by \"permanent\" state"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 nopermanent"
+	log_test $? 255 "Flush by \"nopermanent\" state"
+
+	# Check that when flushing by routing protocol only entries programmed
+	# with the specified routing protocol are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto bgp dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto zebra dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\""
+	log_test $? 1 "Flush by routing protocol - matching"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\""
+	log_test $? 0 "Flush by routing protocol - non-matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Check that when flushing by destination IP only entries programmed
+	# with the specified destination IP are flushed and the rest are not.
+
+	# IPv4.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+	log_test $? 1 "Flush by IPv4 destination IP - matching"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+	log_test $? 0 "Flush by IPv4 destination IP - non-matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# IPv6.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2"
+	log_test $? 1 "Flush by IPv6 destination IP - matching"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1"
+	log_test $? 0 "Flush by IPv6 destination IP - non-matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Check that when flushing by UDP destination port only entries
+	# programmed with the specified port are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 11111 dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\""
+	log_test $? 1 "Flush by UDP destination port - matching"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\""
+	log_test $? 0 "Flush by UDP destination port - non-matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# When not specifying a UDP destination port for an entry, traffic is
+	# encapsulated with the device's UDP destination port. Check that when
+	# flushing by the device's UDP destination port only entries programmed
+	# with this port are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+	log_test $? 1 "Flush by device's UDP destination port - matching"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+	log_test $? 0 "Flush by device's UDP destination port - non-matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Check that when flushing by destination VNI only entries programmed
+	# with the specified destination VNI are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20011 dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\""
+	log_test $? 1 "Flush by destination VNI - matching"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\""
+	log_test $? 0 "Flush by destination VNI - non-matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# When not specifying a destination VNI for an entry, traffic is
+	# encapsulated with the source VNI. Check that when flushing by a
+	# destination VNI that is equal to the source VNI only such entries are
+	# flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+	log_test $? 1 "Flush by destination VNI equal to source VNI - matching"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+	log_test $? 0 "Flush by destination VNI equal to source VNI - non-matching"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Test that an error is returned when trying to flush using VLAN ID.
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vid 10"
+	log_test $? 255 "Flush by VLAN ID"
+}
+
+################################################################################
+# Tests - Data path
+
+encap_params_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local vtep1_ip=$1; shift
+	local vtep2_ip=$1; shift
+	local plen=$1; shift
+	local enc_ethtype=$1; shift
+	local grp=$1; shift
+	local src=$1; shift
+	local mz=$1; shift
+
+	# Test that packets forwarded by the VXLAN MDB are encapsulated with
+	# the correct parameters. Transmit packets from the first namespace and
+	# check that they hit the corresponding filters on the ingress of the
+	# second namespace.
+
+	run_cmd "tc -n $ns2 qdisc replace dev veth0 clsact"
+	run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+	run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+	run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+	# Check destination IP.
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
+
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Destination IP - match"
+
+	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Destination IP - no match"
+
+	run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower"
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10020"
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+
+	# Check destination port.
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
+
+	run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+	log_test $? 0 "Default destination port - match"
+
+	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+	log_test $? 0 "Default destination port - no match"
+
+	run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
+	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+	log_test $? 0 "Non-default destination port - match"
+
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+	log_test $? 0 "Non-default destination port - no match"
+
+	run_cmd "tc -n $ns2 filter del dev veth0 ingress pref 1 handle 101 flower"
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020"
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+
+	# Check default VNI.
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
+
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Default destination VNI - match"
+
+	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Default destination VNI - no match"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10020 src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
+
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Non-default destination VNI - match"
+
+	run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Non-default destination VNI - no match"
+
+	run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower"
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020"
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+}
+
+encap_params_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local plen=32
+	local enc_ethtype="ip"
+	local grp=239.1.1.1
+	local src=192.0.2.129
+
+	echo
+	echo "Data path: Encapsulation parameters - IPv4 overlay / IPv4 underlay"
+	echo "------------------------------------------------------------------"
+
+	encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+		$grp $src "mausezahn"
+}
+
+encap_params_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local plen=32
+	local enc_ethtype="ip"
+	local grp=ff0e::1
+	local src=2001:db8:100::1
+
+	echo
+	echo "Data path: Encapsulation parameters - IPv6 overlay / IPv4 underlay"
+	echo "------------------------------------------------------------------"
+
+	encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+		$grp $src "mausezahn -6"
+}
+
+encap_params_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local plen=128
+	local enc_ethtype="ipv6"
+	local grp=239.1.1.1
+	local src=192.0.2.129
+
+	echo
+	echo "Data path: Encapsulation parameters - IPv4 overlay / IPv6 underlay"
+	echo "------------------------------------------------------------------"
+
+	encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+		$grp $src "mausezahn"
+}
+
+encap_params_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local plen=128
+	local enc_ethtype="ipv6"
+	local grp=ff0e::1
+	local src=2001:db8:100::1
+
+	echo
+	echo "Data path: Encapsulation parameters - IPv6 overlay / IPv6 underlay"
+	echo "------------------------------------------------------------------"
+
+	encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+		$grp $src "mausezahn -6"
+}
+
+starg_exclude_ir_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local vtep1_ip=$1; shift
+	local vtep2_ip=$1; shift
+	local plen=$1; shift
+	local grp=$1; shift
+	local valid_src=$1; shift
+	local invalid_src=$1; shift
+	local mz=$1; shift
+
+	# Install a (*, G) EXCLUDE MDB entry with one source and two remote
+	# VTEPs. Make sure that the source in the source list is not forwarded
+	# and that a source not in the list is forwarded. Remove one of the
+	# VTEPs from the entry and make sure that packets are only forwarded to
+	# the remaining VTEP.
+
+	run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+	run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+	run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
+
+	# Check that invalid source is not forwarded to any VTEP.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+	log_test $? 0 "Block excluded source - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+	log_test $? 0 "Block excluded source - second VTEP"
+
+	# Check that valid source is forwarded to both VTEPs.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Forward valid source - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Forward valid source - second VTEP"
+
+	# Remove second VTEP.
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+
+	# Check that invalid source is not forwarded to any VTEP.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Block excluded source after removal - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Block excluded source after removal - second VTEP"
+
+	# Check that valid source is forwarded to the remaining VTEP.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+	log_test $? 0 "Forward valid source after removal - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Forward valid source after removal - second VTEP"
+}
+
+starg_exclude_ir_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local plen=32
+	local grp=239.1.1.1
+	local valid_src=192.0.2.129
+	local invalid_src=192.0.2.145
+
+	echo
+	echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv4 underlay"
+	echo "-------------------------------------------------------------"
+
+	starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+		$valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_ir_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local plen=32
+	local grp=ff0e::1
+	local valid_src=2001:db8:100::1
+	local invalid_src=2001:db8:200::1
+
+	echo
+	echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv4 underlay"
+	echo "-------------------------------------------------------------"
+
+	starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+		$valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_ir_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local plen=128
+	local grp=239.1.1.1
+	local valid_src=192.0.2.129
+	local invalid_src=192.0.2.145
+
+	echo
+	echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv6 underlay"
+	echo "-------------------------------------------------------------"
+
+	starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+		$valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_ir_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local plen=128
+	local grp=ff0e::1
+	local valid_src=2001:db8:100::1
+	local invalid_src=2001:db8:200::1
+
+	echo
+	echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv6 underlay"
+	echo "-------------------------------------------------------------"
+
+	starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+		$valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_ir_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local vtep1_ip=$1; shift
+	local vtep2_ip=$1; shift
+	local plen=$1; shift
+	local grp=$1; shift
+	local valid_src=$1; shift
+	local invalid_src=$1; shift
+	local mz=$1; shift
+
+	# Install a (*, G) INCLUDE MDB entry with one source and two remote
+	# VTEPs. Make sure that the source in the source list is forwarded and
+	# that a source not in the list is not forwarded. Remove one of the
+	# VTEPs from the entry and make sure that packets are only forwarded to
+	# the remaining VTEP.
+
+	run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+	run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+	run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
+
+	# Check that invalid source is not forwarded to any VTEP.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+	log_test $? 0 "Block excluded source - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+	log_test $? 0 "Block excluded source - second VTEP"
+
+	# Check that valid source is forwarded to both VTEPs.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Forward valid source - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Forward valid source - second VTEP"
+
+	# Remove second VTEP.
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+
+	# Check that invalid source is not forwarded to any VTEP.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Block excluded source after removal - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Block excluded source after removal - second VTEP"
+
+	# Check that valid source is forwarded to the remaining VTEP.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+	log_test $? 0 "Forward valid source after removal - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Forward valid source after removal - second VTEP"
+}
+
+starg_include_ir_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local plen=32
+	local grp=239.1.1.1
+	local valid_src=192.0.2.129
+	local invalid_src=192.0.2.145
+
+	echo
+	echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv4 underlay"
+	echo "-------------------------------------------------------------"
+
+	starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+		$valid_src $invalid_src "mausezahn"
+}
+
+starg_include_ir_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local plen=32
+	local grp=ff0e::1
+	local valid_src=2001:db8:100::1
+	local invalid_src=2001:db8:200::1
+
+	echo
+	echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv4 underlay"
+	echo "-------------------------------------------------------------"
+
+	starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+		$valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_ir_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local plen=128
+	local grp=239.1.1.1
+	local valid_src=192.0.2.129
+	local invalid_src=192.0.2.145
+
+	echo
+	echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv6 underlay"
+	echo "-------------------------------------------------------------"
+
+	starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+		$valid_src $invalid_src "mausezahn"
+}
+
+starg_include_ir_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local plen=128
+	local grp=ff0e::1
+	local valid_src=2001:db8:100::1
+	local invalid_src=2001:db8:200::1
+
+	echo
+	echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv6 underlay"
+	echo "-------------------------------------------------------------"
+
+	starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+		$valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_p2mp_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local mcast_grp=$1; shift
+	local plen=$1; shift
+	local grp=$1; shift
+	local valid_src=$1; shift
+	local invalid_src=$1; shift
+	local mz=$1; shift
+
+	# Install a (*, G) EXCLUDE MDB entry with one source and one multicast
+	# group to which packets are sent. Make sure that the source in the
+	# source list is not forwarded and that a source not in the list is
+	# forwarded.
+
+	run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+	run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
+
+	# Check that invalid source is not forwarded.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+	log_test $? 0 "Block excluded source"
+
+	# Check that valid source is forwarded.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Forward valid source"
+
+	# Remove the VTEP from the multicast group.
+	run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+
+	# Check that valid source is not received anymore.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Receive of valid source after removal from group"
+}
+
+starg_exclude_p2mp_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local mcast_grp=238.1.1.1
+	local plen=32
+	local grp=239.1.1.1
+	local valid_src=192.0.2.129
+	local invalid_src=192.0.2.145
+
+	echo
+	echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+	echo "---------------------------------------------------------------"
+
+	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+		$valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_p2mp_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local mcast_grp=238.1.1.1
+	local plen=32
+	local grp=ff0e::1
+	local valid_src=2001:db8:100::1
+	local invalid_src=2001:db8:200::1
+
+	echo
+	echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+	echo "---------------------------------------------------------------"
+
+	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+		$valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_p2mp_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local mcast_grp=ff0e::2
+	local plen=128
+	local grp=239.1.1.1
+	local valid_src=192.0.2.129
+	local invalid_src=192.0.2.145
+
+	echo
+	echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+	echo "---------------------------------------------------------------"
+
+	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+		$valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_p2mp_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local mcast_grp=ff0e::2
+	local plen=128
+	local grp=ff0e::1
+	local valid_src=2001:db8:100::1
+	local invalid_src=2001:db8:200::1
+
+	echo
+	echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+	echo "---------------------------------------------------------------"
+
+	starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+		$valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_p2mp_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local mcast_grp=$1; shift
+	local plen=$1; shift
+	local grp=$1; shift
+	local valid_src=$1; shift
+	local invalid_src=$1; shift
+	local mz=$1; shift
+
+	# Install a (*, G) INCLUDE MDB entry with one source and one multicast
+	# group to which packets are sent. Make sure that the source in the
+	# source list is forwarded and that a source not in the list is not
+	# forwarded.
+
+	run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+	run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
+
+	# Check that invalid source is not forwarded.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+	log_test $? 0 "Block excluded source"
+
+	# Check that valid source is forwarded.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Forward valid source"
+
+	# Remove the VTEP from the multicast group.
+	run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+
+	# Check that valid source is not received anymore.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Receive of valid source after removal from group"
+}
+
+starg_include_p2mp_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local mcast_grp=238.1.1.1
+	local plen=32
+	local grp=239.1.1.1
+	local valid_src=192.0.2.129
+	local invalid_src=192.0.2.145
+
+	echo
+	echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+	echo "---------------------------------------------------------------"
+
+	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+		$valid_src $invalid_src "mausezahn"
+}
+
+starg_include_p2mp_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local mcast_grp=238.1.1.1
+	local plen=32
+	local grp=ff0e::1
+	local valid_src=2001:db8:100::1
+	local invalid_src=2001:db8:200::1
+
+	echo
+	echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+	echo "---------------------------------------------------------------"
+
+	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+		$valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_p2mp_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local mcast_grp=ff0e::2
+	local plen=128
+	local grp=239.1.1.1
+	local valid_src=192.0.2.129
+	local invalid_src=192.0.2.145
+
+	echo
+	echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+	echo "---------------------------------------------------------------"
+
+	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+		$valid_src $invalid_src "mausezahn"
+}
+
+starg_include_p2mp_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local mcast_grp=ff0e::2
+	local plen=128
+	local grp=ff0e::1
+	local valid_src=2001:db8:100::1
+	local invalid_src=2001:db8:200::1
+
+	echo
+	echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+	echo "---------------------------------------------------------------"
+
+	starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+		$valid_src $invalid_src "mausezahn -6"
+}
+
+egress_vni_translation_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local mcast_grp=$1; shift
+	local plen=$1; shift
+	local proto=$1; shift
+	local grp=$1; shift
+	local src=$1; shift
+	local mz=$1; shift
+
+	# When P2MP tunnels are used with optimized inter-subnet multicast
+	# (OISM) [1], the ingress VTEP does not perform VNI translation and
+	# uses the VNI of the source broadcast domain (BD). If the egress VTEP
+	# is a member in the source BD, then no VNI translation is needed.
+	# Otherwise, the egress VTEP needs to translate the VNI to the
+	# supplementary broadcast domain (SBD) VNI, which is usually the L3VNI.
+	#
+	# In this test, remove the VTEP in the second namespace from VLAN 10
+	# (VNI 10010) and make sure that a packet sent from this VLAN on the
+	# first VTEP is received by the SVI corresponding to the L3VNI (14000 /
+	# VLAN 4000) on the second VTEP.
+	#
+	# The second VTEP will be able to decapsulate the packet with VNI 10010
+	# because this VNI is configured on its shared VXLAN device. Later,
+	# when ingressing the bridge, the VNI to VLAN lookup will fail because
+	# the VTEP is not a member in VLAN 10, which will cause the packet to
+	# be tagged with VLAN 4000 since it is configured as PVID.
+	#
+	# [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast
+
+	run_cmd "tc -n $ns2 qdisc replace dev br0.4000 clsact"
+	run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+	run_cmd "tc -n $ns2 filter replace dev br0.4000 ingress pref 1 handle 101 proto $proto flower src_ip $src dst_ip $grp action pass"
+
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp src $src permanent dst $mcast_grp src_vni 10010 via veth0"
+
+	# Remove the second VTEP from VLAN 10.
+	run_cmd "bridge -n $ns2 vlan del vid 10 dev vx0"
+
+	# Make sure that packets sent from the first VTEP over VLAN 10 are
+	# received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
+	# the second VTEP, since it is configured as PVID.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+	log_test $? 0 "Egress VNI translation - PVID configured"
+
+	# Remove PVID flag from VLAN 4000 on the second VTEP and make sure
+	# packets are no longer received by the SVI interface.
+	run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+	log_test $? 0 "Egress VNI translation - no PVID configured"
+
+	# Reconfigure the PVID and make sure packets are received again.
+	run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
+	log_test $? 0 "Egress VNI translation - PVID reconfigured"
+}
+
+egress_vni_translation_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local mcast_grp=238.1.1.1
+	local plen=32
+	local proto="ipv4"
+	local grp=239.1.1.1
+	local src=192.0.2.129
+
+	echo
+	echo "Data path: Egress VNI translation - IPv4 overlay / IPv4 underlay"
+	echo "----------------------------------------------------------------"
+
+	egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+		$src "mausezahn"
+}
+
+egress_vni_translation_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local mcast_grp=238.1.1.1
+	local plen=32
+	local proto="ipv6"
+	local grp=ff0e::1
+	local src=2001:db8:100::1
+
+	echo
+	echo "Data path: Egress VNI translation - IPv6 overlay / IPv4 underlay"
+	echo "----------------------------------------------------------------"
+
+	egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+		$src "mausezahn -6"
+}
+
+egress_vni_translation_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local mcast_grp=ff0e::2
+	local plen=128
+	local proto="ipv4"
+	local grp=239.1.1.1
+	local src=192.0.2.129
+
+	echo
+	echo "Data path: Egress VNI translation - IPv4 overlay / IPv6 underlay"
+	echo "----------------------------------------------------------------"
+
+	egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+		$src "mausezahn"
+}
+
+egress_vni_translation_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local mcast_grp=ff0e::2
+	local plen=128
+	local proto="ipv6"
+	local grp=ff0e::1
+	local src=2001:db8:100::1
+
+	echo
+	echo "Data path: Egress VNI translation - IPv6 overlay / IPv6 underlay"
+	echo "----------------------------------------------------------------"
+
+	egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+		$src "mausezahn -6"
+}
+
+all_zeros_mdb_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local vtep1_ip=$1; shift
+	local vtep2_ip=$1; shift
+	local vtep3_ip=$1; shift
+	local vtep4_ip=$1; shift
+	local plen=$1; shift
+	local ipv4_grp=239.1.1.1
+	local ipv4_unreg_grp=239.2.2.2
+	local ipv4_ll_grp=224.0.0.100
+	local ipv4_src=192.0.2.129
+	local ipv6_grp=ff0e::1
+	local ipv6_unreg_grp=ff0e::2
+	local ipv6_ll_grp=ff02::1
+	local ipv6_src=2001:db8:100::1
+
+	# Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
+	# and make sure they only forward unregistered IP multicast traffic
+	# which is not link-local. Also make sure that each entry only forwards
+	# traffic from the matching address family.
+
+	# Associate two different VTEPs with one all-zeros MDB entry: Two with
+	# the IPv4 entry (0.0.0.0) and another two with the IPv6 one (::).
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep2_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep3_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep4_ip src_vni 10010"
+
+	# Associate one VTEP from each set with a regular MDB entry: One with
+	# an IPv4 entry and another with an IPv6 one.
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv4_grp permanent dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv6_grp permanent dst $vtep3_ip src_vni 10010"
+
+	# Add filters to match on decapsulated traffic in the second namespace.
+	run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 103 proto all flower enc_dst_ip $vtep3_ip action pass"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 104 proto all flower enc_dst_ip $vtep4_ip action pass"
+
+	# Configure the VTEP addresses in the second namespace to enable
+	# decapsulation.
+	run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+	run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+	run_cmd "ip -n $ns2 address replace $vtep3_ip/$plen dev lo"
+	run_cmd "ip -n $ns2 address replace $vtep4_ip/$plen dev lo"
+
+	# Send registered IPv4 multicast and make sure it only arrives to the
+	# first VTEP.
+	run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Registered IPv4 multicast - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+	log_test $? 0 "Registered IPv4 multicast - second VTEP"
+
+	# Send unregistered IPv4 multicast that is not link-local and make sure
+	# it arrives to the first and second VTEPs.
+	run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+	log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Unregistered IPv4 multicast - second VTEP"
+
+	# Send IPv4 link-local multicast traffic and make sure it does not
+	# arrive to any VTEP.
+	run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+	log_test $? 0 "Link-local IPv4 multicast - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Link-local IPv4 multicast - second VTEP"
+
+	# Send registered IPv4 multicast using a unicast MAC address and make
+	# sure it does not arrive to any VTEP.
+	run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b 00:11:22:33:44:55 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+	log_test $? 0 "Registered IPv4 multicast with a unicast MAC - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Registered IPv4 multicast with a unicast MAC - second VTEP"
+
+	# Send registered IPv4 multicast using a broadcast MAC address and make
+	# sure it does not arrive to any VTEP.
+	run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b bcast -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+	log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - second VTEP"
+
+	# Make sure IPv4 traffic did not reach the VTEPs associated with
+	# IPv6 entries.
+	tc_check_packets "$ns2" "dev vx0 ingress" 103 0
+	log_test $? 0 "IPv4 traffic - third VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 104 0
+	log_test $? 0 "IPv4 traffic - fourth VTEP"
+
+	# Reset IPv4 filters before testing IPv6 traffic.
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+	# Send registered IPv6 multicast and make sure it only arrives to the
+	# third VTEP.
+	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 103 1
+	log_test $? 0 "Registered IPv6 multicast - third VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 104 0
+	log_test $? 0 "Registered IPv6 multicast - fourth VTEP"
+
+	# Send unregistered IPv6 multicast that is not link-local and make sure
+	# it arrives to the third and fourth VTEPs.
+	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+	log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+	log_test $? 0 "Unregistered IPv6 multicast - fourth VTEP"
+
+	# Send IPv6 link-local multicast traffic and make sure it does not
+	# arrive to any VTEP.
+	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+	log_test $? 0 "Link-local IPv6 multicast - third VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+	log_test $? 0 "Link-local IPv6 multicast - fourth VTEP"
+
+	# Send registered IPv6 multicast using a unicast MAC address and make
+	# sure it does not arrive to any VTEP.
+	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b 00:11:22:33:44:55 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+	log_test $? 0 "Registered IPv6 multicast with a unicast MAC - third VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+	log_test $? 0 "Registered IPv6 multicast with a unicast MAC - fourth VTEP"
+
+	# Send registered IPv6 multicast using a broadcast MAC address and make
+	# sure it does not arrive to any VTEP.
+	run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b bcast -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+	log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - third VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+	log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - fourth VTEP"
+
+	# Make sure IPv6 traffic did not reach the VTEPs associated with
+	# IPv4 entries.
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+	log_test $? 0 "IPv6 traffic - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+	log_test $? 0 "IPv6 traffic - second VTEP"
+}
+
+all_zeros_mdb_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.101
+	local vtep2_ip=198.51.100.102
+	local vtep3_ip=198.51.100.103
+	local vtep4_ip=198.51.100.104
+	local plen=32
+
+	echo
+	echo "Data path: All-zeros MDB entry - IPv4 underlay"
+	echo "----------------------------------------------"
+
+	all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \
+		$vtep4_ip $plen
+}
+
+all_zeros_mdb_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local vtep3_ip=2001:db8:3000::1
+	local vtep4_ip=2001:db8:4000::1
+	local plen=128
+
+	echo
+	echo "Data path: All-zeros MDB entry - IPv6 underlay"
+	echo "----------------------------------------------"
+
+	all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \
+		$vtep4_ip $plen
+}
+
+mdb_fdb_common()
+{
+	local ns1=$1; shift
+	local ns2=$1; shift
+	local vtep1_ip=$1; shift
+	local vtep2_ip=$1; shift
+	local plen=$1; shift
+	local proto=$1; shift
+	local grp=$1; shift
+	local src=$1; shift
+	local mz=$1; shift
+
+	# Install an MDB entry and an FDB entry and make sure that the FDB
+	# entry only forwards traffic that was not forwarded by the MDB.
+
+	# Associate the MDB entry with one VTEP and the FDB entry with another
+	# VTEP.
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 fdb add 00:00:00:00:00:00 dev vx0 self static dst $vtep2_ip src_vni 10010"
+
+	# Add filters to match on decapsulated traffic in the second namespace.
+	run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep1_ip action pass"
+	run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep2_ip action pass"
+
+	# Configure the VTEP addresses in the second namespace to enable
+	# decapsulation.
+	run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+	run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+	# Send IP multicast traffic and make sure it is forwarded by the MDB
+	# and only arrives to the first VTEP.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "IP multicast - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+	log_test $? 0 "IP multicast - second VTEP"
+
+	# Send broadcast traffic and make sure it is forwarded by the FDB and
+	# only arrives to the second VTEP.
+	run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b bcast -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "Broadcast - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+	log_test $? 0 "Broadcast - second VTEP"
+
+	# Remove the MDB entry and make sure that IP multicast is now forwarded
+	# by the FDB to the second VTEP.
+	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+	run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+	tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+	log_test $? 0 "IP multicast after removal - first VTEP"
+	tc_check_packets "$ns2" "dev vx0 ingress" 102 2
+	log_test $? 0 "IP multicast after removal - second VTEP"
+}
+
+mdb_fdb_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local plen=32
+	local proto="ipv4"
+	local grp=239.1.1.1
+	local src=192.0.2.129
+
+	echo
+	echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
+	echo "------------------------------------------------------"
+
+	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+		"mausezahn"
+}
+
+mdb_fdb_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local plen=32
+	local proto="ipv6"
+	local grp=ff0e::1
+	local src=2001:db8:100::1
+
+	echo
+	echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
+	echo "------------------------------------------------------"
+
+	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+		"mausezahn -6"
+}
+
+mdb_fdb_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local plen=128
+	local proto="ipv4"
+	local grp=239.1.1.1
+	local src=192.0.2.129
+
+	echo
+	echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
+	echo "------------------------------------------------------"
+
+	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+		"mausezahn"
+}
+
+mdb_fdb_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local plen=128
+	local proto="ipv6"
+	local grp=ff0e::1
+	local src=2001:db8:100::1
+
+	echo
+	echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
+	echo "------------------------------------------------------"
+
+	mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+		"mausezahn -6"
+}
+
+mdb_grp1_loop()
+{
+	local ns1=$1; shift
+	local vtep1_ip=$1; shift
+	local grp1=$1; shift
+
+	while true; do
+		bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp1 dst $vtep1_ip src_vni 10010
+		bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010
+	done >/dev/null 2>&1
+}
+
+mdb_grp2_loop()
+{
+	local ns1=$1; shift
+	local vtep1_ip=$1; shift
+	local vtep2_ip=$1; shift
+	local grp2=$1; shift
+
+	while true; do
+		bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp2 dst $vtep1_ip src_vni 10010
+		bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010
+		bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010
+	done >/dev/null 2>&1
+}
+
+mdb_torture_common()
+{
+	local ns1=$1; shift
+	local vtep1_ip=$1; shift
+	local vtep2_ip=$1; shift
+	local grp1=$1; shift
+	local grp2=$1; shift
+	local src=$1; shift
+	local mz=$1; shift
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+
+	# Continuously send two streams that are forwarded by two different MDB
+	# entries. The first entry will be added and deleted in a loop. This
+	# allows us to test that the data path does not use freed MDB entry
+	# memory. The second entry will have two remotes, one that is added and
+	# deleted in a loop and another that is replaced in a loop. This allows
+	# us to test that the data path does not use freed remote entry memory.
+	# The test is considered successful if nothing crashed.
+
+	# Create the MDB entries that will be continuously deleted / replaced.
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010"
+	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010"
+
+	mdb_grp1_loop $ns1 $vtep1_ip $grp1 &
+	pid1=$!
+	mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
+	pid2=$!
+	ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+	pid3=$!
+	ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+	pid4=$!
+
+	sleep 30
+	kill -9 $pid1 $pid2 $pid3 $pid4
+	wait $pid1 $pid2 $pid3 $pid4 2>/dev/null
+
+	log_test 0 0 "Torture test"
+}
+
+mdb_torture_ipv4_ipv4()
+{
+	local ns1=$ns1_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local grp1=239.1.1.1
+	local grp2=239.2.2.2
+	local src=192.0.2.129
+
+	echo
+	echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
+	echo "----------------------------------------------------------"
+
+	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+		"mausezahn"
+}
+
+mdb_torture_ipv6_ipv4()
+{
+	local ns1=$ns1_v4
+	local vtep1_ip=198.51.100.100
+	local vtep2_ip=198.51.100.200
+	local grp1=ff0e::1
+	local grp2=ff0e::2
+	local src=2001:db8:100::1
+
+	echo
+	echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
+	echo "----------------------------------------------------------"
+
+	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+		"mausezahn -6"
+}
+
+mdb_torture_ipv4_ipv6()
+{
+	local ns1=$ns1_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local grp1=239.1.1.1
+	local grp2=239.2.2.2
+	local src=192.0.2.129
+
+	echo
+	echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
+	echo "----------------------------------------------------------"
+
+	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+		"mausezahn"
+}
+
+mdb_torture_ipv6_ipv6()
+{
+	local ns1=$ns1_v6
+	local vtep1_ip=2001:db8:1000::1
+	local vtep2_ip=2001:db8:2000::1
+	local grp1=ff0e::1
+	local grp2=ff0e::2
+	local src=2001:db8:100::1
+
+	echo
+	echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
+	echo "----------------------------------------------------------"
+
+	mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+		"mausezahn -6"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -c          Control path tests only
+        -d          Data path tests only
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:cdpPvh" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		c) TESTS=${CONTROL_PATH_TESTS};;
+		d) TESTS=${DATA_PATH_TESTS};;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+	echo "SKIP: Could not run test without bridge tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+	echo "SKIP: Could not run test without mausezahn tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+	echo "SKIP: Could not run test without jq tool"
+	exit $ksft_skip
+fi
+
+bridge mdb help 2>&1 | grep -q "flush"
+if [ $? -ne 0 ]; then
+   echo "SKIP: iproute2 bridge too old, missing VXLAN MDB flush support"
+   exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+	setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
new file mode 100755
index 000000000000..b8805983b728
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
@@ -0,0 +1,238 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the [no]localbypass VXLAN device option. The test
+# configures two VXLAN devices in the same network namespace and a tc filter on
+# the loopback device that drops encapsulated packets. The test sends packets
+# from the first VXLAN device and verifies that by default these packets are
+# received by the second VXLAN device. The test then enables the nolocalbypass
+# option and verifies that packets are no longer received by the second VXLAN
+# device.
+
+source lib.sh
+ret=0
+
+TESTS="
+	nolocalbypass
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "$VERBOSE" = "1" ]; then
+			echo "    rc=$rc, expected $expected"
+		fi
+
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+tc_check_packets()
+{
+	local ns=$1; shift
+	local id=$1; shift
+	local handle=$1; shift
+	local count=$1; shift
+	local pkts
+
+	sleep 0.1
+	pkts=$(tc -n $ns -j -s filter show $id \
+		| jq ".[] | select(.options.handle == $handle) | \
+		.options.actions[0].stats.packets")
+	[[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+	setup_ns ns1
+
+	ip -n $ns1 address add 192.0.2.1/32 dev lo
+	ip -n $ns1 address add 198.51.100.1/32 dev lo
+
+	ip -n $ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \
+		dstport 4789 nolearning
+	ip -n $ns1 link add name vx1 up type vxlan id 100 dstport 4790
+}
+
+cleanup()
+{
+	cleanup_ns $ns1
+}
+
+################################################################################
+# Tests
+
+nolocalbypass()
+{
+	local smac=00:01:02:03:04:05
+	local dmac=00:0a:0b:0c:0d:0e
+
+	run_cmd "bridge -n $ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790"
+
+	run_cmd "tc -n $ns1 qdisc add dev vx1 clsact"
+	run_cmd "tc -n $ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass"
+
+	run_cmd "tc -n $ns1 qdisc add dev lo clsact"
+	run_cmd "tc -n $ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop"
+
+	run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
+	log_test $? 0 "localbypass enabled"
+
+	run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+	tc_check_packets "$ns1" "dev vx1 ingress" 101 1
+	log_test $? 0 "Packet received by local VXLAN device - localbypass"
+
+	run_cmd "ip -n $ns1 link set dev vx0 type vxlan nolocalbypass"
+
+	run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'"
+	log_test $? 0 "localbypass disabled"
+
+	run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+	tc_check_packets "$ns1" "dev vx1 ingress" 101 1
+	log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass"
+
+	run_cmd "ip -n $ns1 link set dev vx0 type vxlan localbypass"
+
+	run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
+	log_test $? 0 "localbypass enabled"
+
+	run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+	tc_check_packets "$ns1" "dev vx1 ingress" 101 2
+	log_test $? 0 "Packet received by local VXLAN device - localbypass"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvh" opt; do
+	case $opt in
+		t) TESTS=$OPTARG ;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+	echo "SKIP: Could not run test without bridge tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+	echo "SKIP: Could not run test without mausezahn tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+	echo "SKIP: Could not run test without jq tool"
+	exit $ksft_skip
+fi
+
+ip link help vxlan 2>&1 | grep -q "localbypass"
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 ip too old, missing VXLAN nolocalbypass support"
+	exit $ksft_skip
+fi
+
+cleanup
+
+for t in $TESTS
+do
+	setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
index 09f9ed92cbe4..ae8fbe3f0779 100755
--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -43,15 +43,14 @@
 # This tests both the connectivity between vm-1 and vm-2, and that the underlay
 # can be moved in and out of the vrf by unsetting and setting veth0's master.
 
+source lib.sh
 set -e
 
 cleanup() {
     ip link del veth-hv-1 2>/dev/null || true
     ip link del veth-tap 2>/dev/null || true
 
-    for ns in hv-1 hv-2 vm-1 vm-2; do
-        ip netns del $ns || true
-    done
+    cleanup_ns $hv_1 $hv_2 $vm_1 $vm_2
 }
 
 # Clean start
@@ -60,70 +59,75 @@ cleanup &> /dev/null
 [[ $1 == "clean" ]] && exit 0
 
 trap cleanup EXIT
+setup_ns hv_1 hv_2 vm_1 vm_2
+hv[1]=$hv_1
+hv[2]=$hv_2
+vm[1]=$vm_1
+vm[2]=$vm_2
 
 # Setup "Hypervisors" simulated with netns
 ip link add veth-hv-1 type veth peer name veth-hv-2
 setup-hv-networking() {
-    hv=$1
+    id=$1
 
-    ip netns add hv-$hv
-    ip link set veth-hv-$hv netns hv-$hv
-    ip -netns hv-$hv link set veth-hv-$hv name veth0
+    ip link set veth-hv-$id netns ${hv[$id]}
+    ip -netns ${hv[$id]} link set veth-hv-$id name veth0
 
-    ip -netns hv-$hv link add vrf-underlay type vrf table 1
-    ip -netns hv-$hv link set vrf-underlay up
-    ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0
-    ip -netns hv-$hv link set veth0 up
+    ip -netns ${hv[$id]} link add vrf-underlay type vrf table 1
+    ip -netns ${hv[$id]} link set vrf-underlay up
+    ip -netns ${hv[$id]} addr add 172.16.0.$id/24 dev veth0
+    ip -netns ${hv[$id]} link set veth0 up
 
-    ip -netns hv-$hv link add br0 type bridge
-    ip -netns hv-$hv link set br0 up
+    ip -netns ${hv[$id]} link add br0 type bridge
+    ip -netns ${hv[$id]} link set br0 up
 
-    ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789
-    ip -netns hv-$hv link set vxlan0 master br0
-    ip -netns hv-$hv link set vxlan0 up
+    ip -netns ${hv[$id]} link add vxlan0 type vxlan id 10 local 172.16.0.$id dev veth0 dstport 4789
+    ip -netns ${hv[$id]} link set vxlan0 master br0
+    ip -netns ${hv[$id]} link set vxlan0 up
 }
 setup-hv-networking 1
 setup-hv-networking 2
 
 # Check connectivity between HVs by pinging hv-2 from hv-1
 echo -n "Checking HV connectivity                                           "
-ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $hv_1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false)
 echo "[ OK ]"
 
 # Setups a "VM" simulated by a netns an a veth pair
 setup-vm() {
     id=$1
 
-    ip netns add vm-$id
     ip link add veth-tap type veth peer name veth-hv
 
-    ip link set veth-tap netns hv-$id
-    ip -netns hv-$id link set veth-tap master br0
-    ip -netns hv-$id link set veth-tap up
+    ip link set veth-tap netns ${hv[$id]}
+    ip -netns ${hv[$id]} link set veth-tap master br0
+    ip -netns ${hv[$id]} link set veth-tap up
+
+    ip link set veth-hv address 02:1d:8d:dd:0c:6$id
 
-    ip link set veth-hv netns vm-$id
-    ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv
-    ip -netns vm-$id link set veth-hv up
+    ip link set veth-hv netns ${vm[$id]}
+    ip -netns ${vm[$id]} addr add 10.0.0.$id/24 dev veth-hv
+    ip -netns ${vm[$id]} link set veth-hv up
 }
 setup-vm 1
 setup-vm 2
 
 # Setup VTEP routes to make ARP work
-bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent
-bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent
+bridge -netns $hv_1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent
+bridge -netns $hv_2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent
 
 echo -n "Check VM connectivity through VXLAN (underlay in the default VRF)  "
-ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
 echo "[ OK ]"
 
 # Move the underlay to a non-default VRF
-ip -netns hv-1 link set veth0 vrf vrf-underlay
-ip -netns hv-1 link set veth0 down
-ip -netns hv-1 link set veth0 up
-ip -netns hv-2 link set veth0 vrf vrf-underlay
-ip -netns hv-2 link set veth0 down
-ip -netns hv-2 link set veth0 up
+ip -netns $hv_1 link set veth0 vrf vrf-underlay
+ip -netns $hv_1 link set vxlan0 down
+ip -netns $hv_1 link set vxlan0 up
+ip -netns $hv_2 link set veth0 vrf vrf-underlay
+ip -netns $hv_2 link set vxlan0 down
+ip -netns $hv_2 link set vxlan0 up
 
 echo -n "Check VM connectivity through VXLAN (underlay in a VRF)            "
-ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
 echo "[ OK ]"
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
new file mode 100755
index 000000000000..6127a78ee988
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -0,0 +1,607 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the VXLAN vni filtering api and
+# datapath.
+# It simulates two hypervisors running two VMs each using four network
+# six namespaces: two for the HVs, four for the VMs. Each VM is
+# connected to a separate bridge. The VM's use overlapping vlans and
+# hence the separate bridge domain. Each vxlan device is a collect
+# metadata device with vni filtering and hence has the ability to
+# terminate configured vni's only.
+
+#  +--------------------------------+     +------------------------------------+
+#  |  vm-11 netns                   |     |  vm-21 netns                       |
+#  |                                |     |                                    |
+#  |+------------+  +-------------+ |     |+-------------+ +----------------+  |
+#  ||veth-11.10  |  |veth-11.20   | |     ||veth-21.10   | | veth-21.20     |  |
+#  ||10.0.10.11/24  |10.0.20.11/24| |     ||10.0.10.21/24| | 10.0.20.21/24  |  |
+#  |+------|-----+  +|------------+ |     |+-----------|-+ +---|------------+  |
+#  |       |         |              |     |            |       |               |
+#  |       |         |              |     |         +------------+             |
+#  |      +------------+            |     |         | veth-21    |             |
+#  |      | veth-11    |            |     |         |            |             |
+#  |      |            |            |     |         +-----|------+             |
+#  |      +-----|------+            |     |               |                    |
+#  |            |                   |     |               |                    |
+#  +------------|-------------------+     +---------------|--------------------+
+#  +------------|-----------------------------------------|-------------------+
+#  |      +-----|------+                            +-----|------+            |
+#  |      |vethhv-11   |                            |vethhv-21   |            |
+#  |      +----|-------+                            +-----|------+            |
+#  |       +---|---+                                  +---|--+                |
+#  |       |  br1  |                                  | br2  |                |
+#  |       +---|---+                                  +---|--+                |
+#  |       +---|----+                                 +---|--+                |
+#  |       |  vxlan1|                                 |vxlan2|                |
+#  |       +--|-----+                                 +--|---+                |
+#  |          |                                          |                    |
+#  |          |         +---------------------+          |                    |
+#  |          |         |veth0                |          |                    |
+#  |          +---------|172.16.0.1/24        -----------+                    |
+#  |                    |2002:fee1::1/64      |                               |
+#  | hv-1 netns         +--------|------------+                               |
+#  +-----------------------------|--------------------------------------------+
+#                                |
+#  +-----------------------------|--------------------------------------------+
+#  | hv-2 netns         +--------|-------------+                              |
+#  |                    | veth0                |                              |
+#  |             +------| 172.16.0.2/24        |---+                          |
+#  |             |      | 2002:fee1::2/64      |   |                          |
+#  |             |      |                      |   |                          |
+#  |             |      +----------------------+   |         -                |
+#  |             |                                 |                          |
+#  |           +-|-------+                +--------|-+                        |
+#  |           | vxlan1  |                |  vxlan2  |                        |
+#  |           +----|----+                +---|------+                        |
+#  |             +--|--+                    +-|---+                           |
+#  |             | br1 |                    | br2 |                           |
+#  |             +--|--+                    +--|--+                           |
+#  |          +-----|-------+             +----|-------+                      |
+#  |          | vethhv-12   |             |vethhv-22   |                      |
+#  |          +------|------+             +-------|----+                      |
+#  +-----------------|----------------------------|---------------------------+
+#                    |                            |
+#  +-----------------|-----------------+ +--------|---------------------------+
+#  |         +-------|---+             | |     +--|---------+                 |
+#  |         | veth-12   |             | |     |veth-22     |                 |
+#  |         +-|--------|+             | |     +--|--------|+                 |
+#  |           |        |              | |        |        |                  |
+#  |+----------|--+ +---|-----------+  | |+-------|-----+ +|---------------+  |
+#  ||veth-12.10   | |veth-12.20     |  | ||veth-22.10   | |veth-22.20      |  |
+#  ||10.0.10.12/24| |10.0.20.12/24  |  | ||10.0.10.22/24| |10.0.20.22/24   |  |
+#  |+-------------+ +---------------+  | |+-------------+ +----------------+  |
+#  |                                   | |                                    |
+#  |                                   | |                                    |
+#  | vm-12 netns                       | |vm-22 netns                         |
+#  +-----------------------------------+ +------------------------------------+
+#
+#
+# This test tests the new vxlan vnifiltering api
+source lib.sh
+ret=0
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+	vxlan_vnifilter_api
+	vxlan_vnifilter_datapath
+	vxlan_vnifilter_datapath_pervni
+	vxlan_vnifilter_datapath_mgroup
+	vxlan_vnifilter_datapath_mgroup_pervni
+	vxlan_vnifilter_metadata_and_traditional_mix
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+check_hv_connectivity() {
+	ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+	sleep 1
+	ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+
+	return $?
+}
+
+check_vm_connectivity() {
+	run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+	log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
+
+	run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+	log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
+}
+
+cleanup() {
+	ip link del veth-hv-1 2>/dev/null || true
+	ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true
+
+	cleanup_ns $hv_1 $hv_2 $vm_11 $vm_21 $vm_12 $vm_22 $vm_31 $vm_32
+}
+
+trap cleanup EXIT
+
+setup-hv-networking() {
+	id=$1
+	local1=$2
+	mask1=$3
+	local2=$4
+	mask2=$5
+
+	ip link set veth-hv-$id netns ${hv[$id]}
+	ip -netns ${hv[$id]} link set veth-hv-$id name veth0
+	ip -netns ${hv[$id]} addr add $local1/$mask1 dev veth0
+	ip -netns ${hv[$id]} addr add $local2/$mask2 dev veth0
+	ip -netns ${hv[$id]} link set veth0 up
+}
+
+# Setups a "VM" simulated by a netns an a veth pair
+# example: setup-vm <hvid> <vmid> <brid> <VATTRS> <mcast_for_bum>
+# VATTRS = comma separated "<vlan>-<v[46]>-<localip>-<remoteip>-<VTYPE>-<vxlandstport>"
+# VTYPE = vxlan device type. "default = traditional device, metadata = metadata device
+#         vnifilter = vnifiltering device,
+#         vnifilterg = vnifiltering device with per vni group/remote"
+# example:
+#     setup-vm 1 11 1 \
+#         10-v4-172.16.0.1-239.1.1.100-vnifilterg,20-v4-172.16.0.1-239.1.1.100-vnifilterg 1
+#
+setup-vm() {
+	hvid=$1
+	vmid=$2
+	brid=$3
+	vattrs=$4
+	mcast=$5
+	lastvxlandev=""
+
+	# create bridge
+	ip -netns ${hv[$hvid]} link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	ip -netns ${hv[$hvid]} link set br$brid up
+
+	# create vm namespace and interfaces and connect to hypervisor
+	# namespace
+	hvvethif="vethhv-$vmid"
+	vmvethif="veth-$vmid"
+	ip link add $hvvethif type veth peer name $vmvethif
+	ip link set $hvvethif netns ${hv[$hvid]}
+	ip link set $vmvethif netns ${vm[$vmid]}
+	ip -netns ${hv[$hvid]} link set $hvvethif up
+	ip -netns ${vm[$vmid]} link set $vmvethif up
+	ip -netns ${hv[$hvid]} link set $hvvethif master br$brid
+
+	# configure VM vlan/vni filtering on hypervisor
+	for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ')
+	do
+	local vid=$(echo $vmap | awk -F'-' '{print ($1)}')
+	local family=$(echo $vmap | awk -F'-' '{print ($2)}')
+	local localip=$(echo $vmap | awk -F'-' '{print ($3)}')
+	local group=$(echo $vmap | awk -F'-' '{print ($4)}')
+	local vtype=$(echo $vmap | awk -F'-' '{print ($5)}')
+	local port=$(echo $vmap | awk -F'-' '{print ($6)}')
+
+	ip -netns ${vm[$vmid]} link add name $vmvethif.$vid link $vmvethif type vlan id $vid
+	ip -netns ${vm[$vmid]} addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid
+	ip -netns ${vm[$vmid]} link set $vmvethif.$vid up
+
+	tid=$vid
+	vxlandev="vxlan$brid"
+	vxlandevflags=""
+
+	if [[ -n $vtype && $vtype == "metadata" ]]; then
+	   vxlandevflags="$vxlandevflags external"
+	elif [[ -n $vtype && $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
+	   vxlandevflags="$vxlandevflags external vnifilter"
+	   tid=$((vid+brid))
+	else
+	   vxlandevflags="$vxlandevflags id $tid"
+	   vxlandev="vxlan$tid"
+	fi
+
+	if [[ -n $vtype && $vtype != "vnifilterg" ]]; then
+	   if [[ -n "$group" && "$group" != "null" ]]; then
+	      if [ $mcast -eq 1 ]; then
+		 vxlandevflags="$vxlandevflags group $group"
+	      else
+		 vxlandevflags="$vxlandevflags remote $group"
+	      fi
+	   fi
+	fi
+
+	if [[ -n "$port" && "$port" != "default" ]]; then
+	      vxlandevflags="$vxlandevflags dstport $port"
+	fi
+
+	# create vxlan device
+	if [ "$vxlandev" != "$lastvxlandev" ]; then
+	     ip -netns ${hv[$hvid]} link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null
+	     ip -netns ${hv[$hvid]} link set $vxlandev master br$brid
+	     ip -netns ${hv[$hvid]} link set $vxlandev up
+	     lastvxlandev=$vxlandev
+	fi
+
+	# add vlan
+	bridge -netns ${hv[$hvid]} vlan add vid $vid dev $hvvethif
+	bridge -netns ${hv[$hvid]} vlan add vid $vid pvid dev $vxlandev
+
+	# Add bridge vni filter for tx
+	if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
+	   bridge -netns ${hv[$hvid]} link set dev $vxlandev vlan_tunnel on
+	   bridge -netns ${hv[$hvid]} vlan add dev $vxlandev vid $vid tunnel_info id $tid
+	fi
+
+	if [[ -n $vtype && $vtype == "metadata" ]]; then
+	   bridge -netns ${hv[$hvid]} fdb add 00:00:00:00:00:00 dev $vxlandev \
+								src_vni $tid vni $tid dst $group self
+	elif [[ -n $vtype && $vtype == "vnifilter" ]]; then
+	   # Add per vni rx filter with 'bridge vni' api
+	   bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid
+	elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then
+	   # Add per vni group config with 'bridge vni' api
+	   if [ -n "$group" ]; then
+		if [ $mcast -eq 1 ]; then
+			bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid group $group
+		else
+			bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid remote $group
+		fi
+	   fi
+	fi
+	done
+}
+
+setup_vnifilter_api()
+{
+	ip link add veth-host type veth peer name veth-testns
+	setup_ns testns
+	ip link set veth-testns netns $testns
+}
+
+cleanup_vnifilter_api()
+{
+	ip link del veth-host 2>/dev/null || true
+	ip netns del $testns 2>/dev/null || true
+}
+
+# tests vxlan filtering api
+vxlan_vnifilter_api()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+	localip="172.16.0.1"
+	group="239.1.1.101"
+
+	cleanup_vnifilter_api &>/dev/null
+	setup_vnifilter_api
+
+	# Duplicate vni test
+	# create non-vnifiltering traditional vni device
+	run_cmd "ip -netns $testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789"
+	log_test $? 0 "Create traditional vxlan device"
+
+	# create vni filtering device
+	run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789"
+	log_test $? 1 "Cannot create vnifilter device without external flag"
+
+	run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+	log_test $? 0 "Creating external vxlan device with vnifilter flag"
+
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 100"
+	log_test $? 0 "Cannot set in-use vni id on vnifiltering device"
+
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200"
+	log_test $? 0 "Set new vni id on vnifiltering device"
+
+	run_cmd "ip -netns $testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+	log_test $? 0 "Create second external vxlan device with vnifilter flag"
+
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 200"
+	log_test $? 255 "Cannot set in-use vni id on vnifiltering device"
+
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300"
+	log_test $? 0 "Set new vni id on vnifiltering device"
+
+	# check in bridge vni show
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300"
+	log_test $? 0 "Update vni id on vnifiltering device"
+
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 400"
+	log_test $? 0 "Add new vni id on vnifiltering device"
+
+	# add multicast group per vni
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200 group $group"
+	log_test $? 0 "Set multicast group on existing vni"
+
+	# add multicast group per vni
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300 group $group"
+	log_test $? 0 "Set multicast group on existing vni"
+
+	# set vnifilter on an existing external vxlan device
+	run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external vnifilter"
+	log_test $? 2 "Cannot set vnifilter flag on a device"
+
+	# change vxlan vnifilter flag
+	run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external novnifilter"
+	log_test $? 2 "Cannot unset vnifilter flag on a device"
+}
+
+# Sanity test vnifilter datapath
+# vnifilter vnis inherit BUM group from
+# vxlan device
+vxlan_vnifilter_datapath()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup_ns vm_11 vm_21 vm_12 vm_22
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[12]=$vm_12
+	vm[22]=$vm_22
+	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
+	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
+
+	setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0
+	setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0
+
+        check_vm_connectivity "vnifiltering vxlan"
+}
+
+# Sanity test vnifilter datapath
+# with vnifilter per vni configured BUM
+# group/remote
+vxlan_vnifilter_datapath_pervni()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup_ns vm_11 vm_21 vm_12 vm_22
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[12]=$vm_12
+	vm[22]=$vm_22
+	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0
+	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0
+
+	setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilterg,20-v4-$hv2addr1-$hv1addr1-vnifilterg 0
+	setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilterg,20-v6-$hv2addr2-$hv1addr2-vnifilterg 0
+
+        check_vm_connectivity "vnifiltering vxlan pervni remote"
+}
+
+
+vxlan_vnifilter_datapath_mgroup()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+        group="239.1.1.100"
+        group6="ff07::1"
+
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup_ns vm_11 vm_21 vm_12 vm_22
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[12]=$vm_12
+	vm[22]=$vm_22
+	setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1
+	setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1
+
+        setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilter,20-v4-$hv2addr1-$group-vnifilter 1
+        setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilter,20-v6-$hv2addr2-$group6-vnifilter 1
+
+        check_vm_connectivity "vnifiltering vxlan mgroup"
+}
+
+vxlan_vnifilter_datapath_mgroup_pervni()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+        group="239.1.1.100"
+        group6="ff07::1"
+
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup_ns vm_11 vm_21 vm_12 vm_22
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[12]=$vm_12
+	vm[22]=$vm_22
+	setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1
+	setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1
+
+        setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilterg,20-v4-$hv2addr1-$group-vnifilterg 1
+        setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilterg,20-v6-$hv2addr2-$group6-vnifilterg 1
+
+        check_vm_connectivity "vnifiltering vxlan pervni mgroup"
+}
+
+vxlan_vnifilter_metadata_and_traditional_mix()
+{
+	hv1addr1="172.16.0.1"
+	hv2addr1="172.16.0.2"
+	hv1addr2="2002:fee1::1"
+	hv2addr2="2002:fee1::2"
+
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
+	ip link add veth-hv-1 type veth peer name veth-hv-2
+	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+        check_hv_connectivity hv2addr1 hv2addr2
+
+	setup_ns vm_11 vm_21 vm_31 vm_12 vm_22 vm_32
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[31]=$vm_31
+	vm[12]=$vm_12
+	vm[22]=$vm_22
+	vm[32]=$vm_32
+	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
+	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
+	setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0
+
+
+	setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0
+	setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0
+	setup-vm 2 32 3 30-v4-$hv2addr1-$hv1addr1-default-4790,40-v6-$hv2addr2-$hv1addr2-default-4790,50-v4-$hv2addr1-$hv1addr1-metadata-4791 0
+
+        check_vm_connectivity "vnifiltering vxlan pervni remote mix"
+
+	# check VM connectivity over traditional/non-vxlan filtering vxlan devices
+	run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.30.32"
+        log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)"
+
+	run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.40.32"
+        log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)"
+
+	run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.50.32"
+        log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)"
+}
+
+while getopts :t:pP46hv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip link help vxlan 2>&1 | grep -q "vnifilter"
+if [ $? -ne 0 ]; then
+   echo "SKIP: iproute2 too old, missing vxlan dev vnifilter setting"
+   sync
+   exit $ksft_skip
+fi
+
+bridge vni help 2>&1 | grep -q "Usage: bridge vni"
+if [ $? -ne 0 ]; then
+   echo "SKIP: iproute2 bridge lacks vxlan vnifiltering support"
+   exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+	case $t in
+	none) setup; exit 0;;
+	*) $t; cleanup;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index f4bb4fef0f39..044bc0e9ed81 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -47,7 +47,7 @@ static void usage(const char *error)
 {
 	if (error)
 		printf("invalid option: %s\n", error);
-	printf("timestamping interface option*\n\n"
+	printf("timestamping <interface> [bind_phc_index] [option]*\n\n"
 	       "Options:\n"
 	       "  IP_MULTICAST_LOOP - looping outgoing multicasts\n"
 	       "  SO_TIMESTAMP - normal software time stamping, ms resolution\n"
@@ -58,8 +58,10 @@ static void usage(const char *error)
 	       "  SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
 	       "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
 	       "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+	       "  SOF_TIMESTAMPING_BIND_PHC - request to bind a PHC of PTP vclock\n"
 	       "  SIOCGSTAMP - check last socket time stamp\n"
-	       "  SIOCGSTAMPNS - more accurate socket time stamp\n");
+	       "  SIOCGSTAMPNS - more accurate socket time stamp\n"
+	       "  PTPV2 - use PTPv2 messages\n");
 	exit(1);
 }
 
@@ -115,13 +117,28 @@ static const unsigned char sync[] = {
 	0x00, 0x00, 0x00, 0x00
 };
 
-static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+static const unsigned char sync_v2[] = {
+	0x00, 0x02, 0x00, 0x2C,
+	0x00, 0x00, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0xFF,
+	0xFE, 0x00, 0x00, 0x00,
+	0x00, 0x01, 0x00, 0x01,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2)
 {
+	size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+	const void *sync_p = ptpv2 ? sync_v2 : sync;
 	struct timeval now;
 	int res;
 
-	res = sendto(sock, sync, sizeof(sync), 0,
-		addr, addr_len);
+	res = sendto(sock, sync_p, sync_len, 0, addr, addr_len);
 	gettimeofday(&now, 0);
 	if (res < 0)
 		printf("%s: %s\n", "send", strerror(errno));
@@ -134,9 +151,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
 static void printpacket(struct msghdr *msg, int res,
 			char *data,
 			int sock, int recvmsg_flags,
-			int siocgstamp, int siocgstampns)
+			int siocgstamp, int siocgstampns, int ptpv2)
 {
 	struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+	size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+	const void *sync_p = ptpv2 ? sync_v2 : sync;
 	struct cmsghdr *cmsg;
 	struct timeval tv;
 	struct timespec ts;
@@ -210,10 +229,9 @@ static void printpacket(struct msghdr *msg, int res,
 					"probably SO_EE_ORIGIN_TIMESTAMPING"
 #endif
 					);
-				if (res < sizeof(sync))
+				if (res < sync_len)
 					printf(" => truncated data?!");
-				else if (!memcmp(sync, data + res - sizeof(sync),
-							sizeof(sync)))
+				else if (!memcmp(sync_p, data + res - sync_len, sync_len))
 					printf(" => GOT OUR DATA BACK (HURRAY!)");
 				break;
 			}
@@ -257,7 +275,7 @@ static void printpacket(struct msghdr *msg, int res,
 }
 
 static void recvpacket(int sock, int recvmsg_flags,
-		       int siocgstamp, int siocgstampns)
+		       int siocgstamp, int siocgstampns, int ptpv2)
 {
 	char data[256];
 	struct msghdr msg;
@@ -288,18 +306,18 @@ static void recvpacket(int sock, int recvmsg_flags,
 	} else {
 		printpacket(&msg, res, data,
 			    sock, recvmsg_flags,
-			    siocgstamp, siocgstampns);
+			    siocgstamp, siocgstampns, ptpv2);
 	}
 }
 
 int main(int argc, char **argv)
 {
-	int so_timestamping_flags = 0;
 	int so_timestamp = 0;
 	int so_timestampns = 0;
 	int siocgstamp = 0;
 	int siocgstampns = 0;
 	int ip_multicast_loop = 0;
+	int ptpv2 = 0;
 	char *interface;
 	int i;
 	int enabled = 1;
@@ -307,6 +325,8 @@ int main(int argc, char **argv)
 	struct ifreq device;
 	struct ifreq hwtstamp;
 	struct hwtstamp_config hwconfig, hwconfig_requested;
+	struct so_timestamping so_timestamping_get = { 0, 0 };
+	struct so_timestamping so_timestamping = { 0, 0 };
 	struct sockaddr_in addr;
 	struct ip_mreq imr;
 	struct in_addr iaddr;
@@ -324,7 +344,12 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
-	for (i = 2; i < argc; i++) {
+	if (argc >= 3 && sscanf(argv[2], "%d", &so_timestamping.bind_phc) == 1)
+		val = 3;
+	else
+		val = 2;
+
+	for (i = val; i < argc; i++) {
 		if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
 			so_timestamp = 1;
 		else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
@@ -335,18 +360,22 @@ int main(int argc, char **argv)
 			siocgstampns = 1;
 		else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
 			ip_multicast_loop = 1;
+		else if (!strcasecmp(argv[i], "PTPV2"))
+			ptpv2 = 1;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
-			so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+			so_timestamping.flags |= SOF_TIMESTAMPING_TX_HARDWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
-			so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+			so_timestamping.flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
-			so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+			so_timestamping.flags |= SOF_TIMESTAMPING_RX_HARDWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
-			so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+			so_timestamping.flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
-			so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+			so_timestamping.flags |= SOF_TIMESTAMPING_SOFTWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
-			so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+			so_timestamping.flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_BIND_PHC"))
+			so_timestamping.flags |= SOF_TIMESTAMPING_BIND_PHC;
 		else
 			usage(argv[i]);
 	}
@@ -365,10 +394,11 @@ int main(int argc, char **argv)
 	hwtstamp.ifr_data = (void *)&hwconfig;
 	memset(&hwconfig, 0, sizeof(hwconfig));
 	hwconfig.tx_type =
-		(so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+		(so_timestamping.flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
 		HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
 	hwconfig.rx_filter =
-		(so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+		(so_timestamping.flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+		ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC :
 		HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
 	hwconfig_requested = hwconfig;
 	if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
@@ -392,6 +422,9 @@ int main(int argc, char **argv)
 		 sizeof(struct sockaddr_in)) < 0)
 		bail("bind");
 
+	if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, interface, if_len))
+		bail("bind device");
+
 	/* set multicast group for outgoing packets */
 	inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
 	addr.sin_addr = iaddr;
@@ -423,10 +456,9 @@ int main(int argc, char **argv)
 			   &enabled, sizeof(enabled)) < 0)
 		bail("setsockopt SO_TIMESTAMPNS");
 
-	if (so_timestamping_flags &&
-		setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
-			   &so_timestamping_flags,
-			   sizeof(so_timestamping_flags)) < 0)
+	if (so_timestamping.flags &&
+	    setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping,
+		       sizeof(so_timestamping)) < 0)
 		bail("setsockopt SO_TIMESTAMPING");
 
 	/* request IP_PKTINFO for debugging purposes */
@@ -447,14 +479,18 @@ int main(int argc, char **argv)
 	else
 		printf("SO_TIMESTAMPNS %d\n", val);
 
-	if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+	len = sizeof(so_timestamping_get);
+	if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping_get,
+		       &len) < 0) {
 		printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
 		       strerror(errno));
 	} else {
-		printf("SO_TIMESTAMPING %d\n", val);
-		if (val != so_timestamping_flags)
-			printf("   not the expected value %d\n",
-			       so_timestamping_flags);
+		printf("SO_TIMESTAMPING flags %d, bind phc %d\n",
+		       so_timestamping_get.flags, so_timestamping_get.bind_phc);
+		if (so_timestamping_get.flags != so_timestamping.flags ||
+		    so_timestamping_get.bind_phc != so_timestamping.bind_phc)
+			printf("   not expected, flags %d, bind phc %d\n",
+			       so_timestamping.flags, so_timestamping.bind_phc);
 	}
 
 	/* send packets forever every five seconds */
@@ -496,16 +532,16 @@ int main(int argc, char **argv)
 					printf("has error\n");
 				recvpacket(sock, 0,
 					   siocgstamp,
-					   siocgstampns);
+					   siocgstampns, ptpv2);
 				recvpacket(sock, MSG_ERRQUEUE,
 					   siocgstamp,
-					   siocgstampns);
+					   siocgstampns, ptpv2);
 			}
 		} else {
 			/* write one packet */
 			sendpacket(sock,
 				   (struct sockaddr *)&addr,
-				   sizeof(addr));
+				   sizeof(addr), ptpv2);
 			next.tv_sec += 5;
 			continue;
 		}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index b599f1fa99b5..c6eda21cefb6 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -15,6 +15,7 @@
 #include <linux/tcp.h>
 #include <linux/socket.h>
 
+#include <sys/epoll.h>
 #include <sys/types.h>
 #include <sys/sendfile.h>
 #include <sys/socket.h>
@@ -25,26 +26,101 @@
 #define TLS_PAYLOAD_MAX_LEN 16384
 #define SOL_TLS 282
 
-FIXTURE(tls_basic)
-{
-	int fd, cfd;
-	bool notls;
+static int fips_enabled;
+
+struct tls_crypto_info_keys {
+	union {
+		struct tls_crypto_info crypto_info;
+		struct tls12_crypto_info_aes_gcm_128 aes128;
+		struct tls12_crypto_info_chacha20_poly1305 chacha20;
+		struct tls12_crypto_info_sm4_gcm sm4gcm;
+		struct tls12_crypto_info_sm4_ccm sm4ccm;
+		struct tls12_crypto_info_aes_ccm_128 aesccm128;
+		struct tls12_crypto_info_aes_gcm_256 aesgcm256;
+		struct tls12_crypto_info_aria_gcm_128 ariagcm128;
+		struct tls12_crypto_info_aria_gcm_256 ariagcm256;
+	};
+	size_t len;
 };
 
-FIXTURE_SETUP(tls_basic)
+static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
+				 struct tls_crypto_info_keys *tls12)
+{
+	memset(tls12, 0, sizeof(*tls12));
+
+	switch (cipher_type) {
+	case TLS_CIPHER_CHACHA20_POLY1305:
+		tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305);
+		tls12->chacha20.info.version = tls_version;
+		tls12->chacha20.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_AES_GCM_128:
+		tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128);
+		tls12->aes128.info.version = tls_version;
+		tls12->aes128.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_SM4_GCM:
+		tls12->len = sizeof(struct tls12_crypto_info_sm4_gcm);
+		tls12->sm4gcm.info.version = tls_version;
+		tls12->sm4gcm.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_SM4_CCM:
+		tls12->len = sizeof(struct tls12_crypto_info_sm4_ccm);
+		tls12->sm4ccm.info.version = tls_version;
+		tls12->sm4ccm.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_AES_CCM_128:
+		tls12->len = sizeof(struct tls12_crypto_info_aes_ccm_128);
+		tls12->aesccm128.info.version = tls_version;
+		tls12->aesccm128.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_AES_GCM_256:
+		tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_256);
+		tls12->aesgcm256.info.version = tls_version;
+		tls12->aesgcm256.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_ARIA_GCM_128:
+		tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_128);
+		tls12->ariagcm128.info.version = tls_version;
+		tls12->ariagcm128.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_ARIA_GCM_256:
+		tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_256);
+		tls12->ariagcm256.info.version = tls_version;
+		tls12->ariagcm256.info.cipher_type = cipher_type;
+		break;
+	default:
+		break;
+	}
+}
+
+static void memrnd(void *s, size_t n)
+{
+	int *dword = s;
+	char *byte;
+
+	for (; n >= 4; n -= 4)
+		*dword++ = rand();
+	byte = (void *)dword;
+	while (n--)
+		*byte++ = rand();
+}
+
+static void ulp_sock_pair(struct __test_metadata *_metadata,
+			  int *fd, int *cfd, bool *notls)
 {
 	struct sockaddr_in addr;
 	socklen_t len;
 	int sfd, ret;
 
-	self->notls = false;
+	*notls = false;
 	len = sizeof(addr);
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = htonl(INADDR_ANY);
 	addr.sin_port = 0;
 
-	self->fd = socket(AF_INET, SOCK_STREAM, 0);
+	*fd = socket(AF_INET, SOCK_STREAM, 0);
 	sfd = socket(AF_INET, SOCK_STREAM, 0);
 
 	ret = bind(sfd, &addr, sizeof(addr));
@@ -55,26 +131,96 @@ FIXTURE_SETUP(tls_basic)
 	ret = getsockname(sfd, &addr, &len);
 	ASSERT_EQ(ret, 0);
 
-	ret = connect(self->fd, &addr, sizeof(addr));
+	ret = connect(*fd, &addr, sizeof(addr));
 	ASSERT_EQ(ret, 0);
 
-	self->cfd = accept(sfd, &addr, &len);
-	ASSERT_GE(self->cfd, 0);
+	*cfd = accept(sfd, &addr, &len);
+	ASSERT_GE(*cfd, 0);
 
 	close(sfd);
 
-	ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
 	if (ret != 0) {
 		ASSERT_EQ(errno, ENOENT);
-		self->notls = true;
+		*notls = true;
 		printf("Failure setting TCP_ULP, testing without tls\n");
 		return;
 	}
 
-	ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
 	ASSERT_EQ(ret, 0);
 }
 
+/* Produce a basic cmsg */
+static int tls_send_cmsg(int fd, unsigned char record_type,
+			 void *data, size_t len, int flags)
+{
+	char cbuf[CMSG_SPACE(sizeof(char))];
+	int cmsg_len = sizeof(char);
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	struct iovec vec;
+
+	vec.iov_base = data;
+	vec.iov_len = len;
+	memset(&msg, 0, sizeof(struct msghdr));
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_TLS;
+	/* test sending non-record types. */
+	cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+	cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+	*CMSG_DATA(cmsg) = record_type;
+	msg.msg_controllen = cmsg->cmsg_len;
+
+	return sendmsg(fd, &msg, flags);
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+			 int fd, unsigned char record_type,
+			 void *data, size_t len, int flags)
+{
+	char cbuf[CMSG_SPACE(sizeof(char))];
+	struct cmsghdr *cmsg;
+	unsigned char ctype;
+	struct msghdr msg;
+	struct iovec vec;
+	int n;
+
+	vec.iov_base = data;
+	vec.iov_len = len;
+	memset(&msg, 0, sizeof(struct msghdr));
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+
+	n = recvmsg(fd, &msg, flags);
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+	EXPECT_NE(cmsg, NULL);
+	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+	ctype = *((unsigned char *)CMSG_DATA(cmsg));
+	EXPECT_EQ(ctype, record_type);
+
+	return n;
+}
+
+FIXTURE(tls_basic)
+{
+	int fd, cfd;
+	bool notls;
+};
+
+FIXTURE_SETUP(tls_basic)
+{
+	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+}
+
 FIXTURE_TEARDOWN(tls_basic)
 {
 	close(self->fd);
@@ -95,6 +241,31 @@ TEST_F(tls_basic, base_base)
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 };
 
+TEST_F(tls_basic, bad_cipher)
+{
+	struct tls_crypto_info_keys tls12;
+
+	tls12.crypto_info.version = 200;
+	tls12.crypto_info.cipher_type = TLS_CIPHER_AES_GCM_128;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+	tls12.crypto_info.version = TLS_1_2_VERSION;
+	tls12.crypto_info.cipher_type = 50;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+	tls12.crypto_info.version = TLS_1_2_VERSION;
+	tls12.crypto_info.cipher_type = 59;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+	tls12.crypto_info.version = TLS_1_2_VERSION;
+	tls12.crypto_info.cipher_type = 10;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+	tls12.crypto_info.version = TLS_1_2_VERSION;
+	tls12.crypto_info.cipher_type = 70;
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+}
+
 FIXTURE(tls)
 {
 	int fd, cfd;
@@ -103,77 +274,122 @@ FIXTURE(tls)
 
 FIXTURE_VARIANT(tls)
 {
-	unsigned int tls_version;
+	uint16_t tls_version;
+	uint16_t cipher_type;
+	bool nopad, fips_non_compliant;
 };
 
-FIXTURE_VARIANT_ADD(tls, 12)
+FIXTURE_VARIANT_ADD(tls, 12_aes_gcm)
 {
 	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_128,
 };
 
-FIXTURE_VARIANT_ADD(tls, 13)
+FIXTURE_VARIANT_ADD(tls, 13_aes_gcm)
 {
 	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_128,
 };
 
-FIXTURE_SETUP(tls)
+FIXTURE_VARIANT_ADD(tls, 12_chacha)
 {
-	struct tls12_crypto_info_aes_gcm_128 tls12;
-	struct sockaddr_in addr;
-	socklen_t len;
-	int sfd, ret;
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
+	.fips_non_compliant = true,
+};
 
-	self->notls = false;
-	len = sizeof(addr);
+FIXTURE_VARIANT_ADD(tls, 13_chacha)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
+	.fips_non_compliant = true,
+};
 
-	memset(&tls12, 0, sizeof(tls12));
-	tls12.info.version = variant->tls_version;
-	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+FIXTURE_VARIANT_ADD(tls, 13_sm4_gcm)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_SM4_GCM,
+	.fips_non_compliant = true,
+};
 
-	addr.sin_family = AF_INET;
-	addr.sin_addr.s_addr = htonl(INADDR_ANY);
-	addr.sin_port = 0;
+FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_SM4_CCM,
+	.fips_non_compliant = true,
+};
 
-	self->fd = socket(AF_INET, SOCK_STREAM, 0);
-	sfd = socket(AF_INET, SOCK_STREAM, 0);
+FIXTURE_VARIANT_ADD(tls, 12_aes_ccm)
+{
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_AES_CCM_128,
+};
 
-	ret = bind(sfd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-	ret = listen(sfd, 10);
-	ASSERT_EQ(ret, 0);
+FIXTURE_VARIANT_ADD(tls, 13_aes_ccm)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_AES_CCM_128,
+};
 
-	ret = getsockname(sfd, &addr, &len);
-	ASSERT_EQ(ret, 0);
+FIXTURE_VARIANT_ADD(tls, 12_aes_gcm_256)
+{
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_256,
+};
 
-	ret = connect(self->fd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
+FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_256,
+};
 
-	ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
-	if (ret != 0) {
-		self->notls = true;
-		printf("Failure setting TCP_ULP, testing without tls\n");
-	}
+FIXTURE_VARIANT_ADD(tls, 13_nopad)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_128,
+	.nopad = true,
+};
 
-	if (!self->notls) {
-		ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
-				 sizeof(tls12));
-		ASSERT_EQ(ret, 0);
-	}
+FIXTURE_VARIANT_ADD(tls, 12_aria_gcm)
+{
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_ARIA_GCM_128,
+};
 
-	self->cfd = accept(sfd, &addr, &len);
-	ASSERT_GE(self->cfd, 0);
+FIXTURE_VARIANT_ADD(tls, 12_aria_gcm_256)
+{
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_ARIA_GCM_256,
+};
 
-	if (!self->notls) {
-		ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
-				 sizeof("tls"));
-		ASSERT_EQ(ret, 0);
+FIXTURE_SETUP(tls)
+{
+	struct tls_crypto_info_keys tls12;
+	int one = 1;
+	int ret;
 
-		ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
-				 sizeof(tls12));
+	if (fips_enabled && variant->fips_non_compliant)
+		SKIP(return, "Unsupported cipher in FIPS mode");
+
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+			     &tls12);
+
+	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+
+	if (self->notls)
+		return;
+
+	ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	if (variant->nopad) {
+		ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+				 (void *)&one, sizeof(one));
 		ASSERT_EQ(ret, 0);
 	}
-
-	close(sfd);
 }
 
 FIXTURE_TEARDOWN(tls)
@@ -277,6 +493,8 @@ TEST_F(tls, recv_max)
 	char recv_mem[TLS_PAYLOAD_MAX_LEN];
 	char buf[TLS_PAYLOAD_MAX_LEN];
 
+	memrnd(buf, sizeof(buf));
+
 	EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
 	EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
 	EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
@@ -318,6 +536,17 @@ TEST_F(tls, msg_more_unsent)
 	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
 }
 
+TEST_F(tls, msg_eor)
+{
+	char const *test_str = "test_read";
+	int send_len = 10;
+	char buf[10];
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, MSG_EOR), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
 TEST_F(tls, sendmsg_single)
 {
 	struct msghdr msg;
@@ -384,11 +613,12 @@ TEST_F(tls, sendmsg_large)
 
 		msg.msg_iov = &vec;
 		msg.msg_iovlen = 1;
-		EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
+		EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
 	}
 
-	while (recvs++ < sends)
-		EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+	while (recvs++ < sends) {
+		EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1);
+	}
 
 	free(mem);
 }
@@ -416,9 +646,9 @@ TEST_F(tls, sendmsg_multiple)
 	msg.msg_iov = vec;
 	msg.msg_iovlen = iov_len;
 
-	EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len);
+	EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len);
 	buf = malloc(total_len);
-	EXPECT_NE(recv(self->fd, buf, total_len, 0), -1);
+	EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1);
 	for (i = 0; i < iov_len; i++) {
 		EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp,
 				 strlen(test_strs[i])),
@@ -477,6 +707,20 @@ TEST_F(tls, splice_from_pipe)
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
+TEST_F(tls, splice_more)
+{
+	unsigned int f = SPLICE_F_NONBLOCK | SPLICE_F_MORE | SPLICE_F_GIFT;
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	int i, send_pipe = 1;
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_GE(write(p[1], mem_send, send_len), 0);
+	for (i = 0; i < 32; i++)
+		EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, send_pipe, f), 1);
+}
+
 TEST_F(tls, splice_from_pipe2)
 {
 	int send_len = 16000;
@@ -485,12 +729,14 @@ TEST_F(tls, splice_from_pipe2)
 	int p2[2];
 	int p[2];
 
+	memrnd(mem_send, sizeof(mem_send));
+
 	ASSERT_GE(pipe(p), 0);
 	ASSERT_GE(pipe(p2), 0);
-	EXPECT_GE(write(p[1], mem_send, 8000), 0);
-	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
-	EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
-	EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
+	EXPECT_EQ(write(p[1], mem_send, 8000), 8000);
+	EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, 8000, 0), 8000);
+	EXPECT_EQ(write(p2[1], mem_send + 8000, 8000), 8000);
+	EXPECT_EQ(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 8000);
 	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
@@ -524,10 +770,107 @@ TEST_F(tls, splice_to_pipe)
 	char mem_recv[TLS_PAYLOAD_MAX_LEN];
 	int p[2];
 
+	memrnd(mem_send, sizeof(mem_send));
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), send_len);
+	EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_cmsg_to_pipe)
+{
+	char *test_str = "test_read";
+	char record_type = 100;
+	int send_len = 10;
+	char buf[10];
+	int p[2];
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+	EXPECT_EQ(errno, EINVAL);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(errno, EIO);
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL),
+		  send_len);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, splice_dec_cmsg_to_pipe)
+{
+	char *test_str = "test_read";
+	char record_type = 100;
+	int send_len = 10;
+	char buf[10];
+	int p[2];
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(errno, EIO);
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+	EXPECT_EQ(errno, EINVAL);
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL),
+		  send_len);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_and_splice)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int half = send_len / 2;
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+	/* Recv hald of the record, splice the other half */
+	EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half);
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK),
+		  half);
+	EXPECT_EQ(read(p[0], &mem_recv[half], half), half);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, peek_and_splice)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int chunk = TLS_PAYLOAD_MAX_LEN / 4;
+	int n, i, p[2];
+
+	memrnd(mem_send, sizeof(mem_send));
+
 	ASSERT_GE(pipe(p), 0);
-	EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0);
-	EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0);
-	EXPECT_GE(read(p[0], mem_recv, send_len), 0);
+	for (i = 0; i < 4; i++)
+		EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0),
+			  chunk);
+
+	EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2,
+		       MSG_WAITALL | MSG_PEEK),
+		  chunk * 5 / 2);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0);
+
+	n = 0;
+	while (n < send_len) {
+		i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0);
+		EXPECT_GT(i, 0);
+		n += i;
+	}
+	EXPECT_EQ(n, send_len);
+	EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
@@ -557,6 +900,8 @@ TEST_F(tls, recvmsg_single_max)
 	struct iovec vec;
 	struct msghdr hdr;
 
+	memrnd(send_mem, sizeof(send_mem));
+
 	EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
 	vec.iov_base = (char *)recv_mem;
 	vec.iov_len = TLS_PAYLOAD_MAX_LEN;
@@ -570,7 +915,6 @@ TEST_F(tls, recvmsg_single_max)
 TEST_F(tls, recvmsg_multiple)
 {
 	unsigned int msg_iovlen = 1024;
-	unsigned int len_compared = 0;
 	struct iovec vec[1024];
 	char *iov_base[1024];
 	unsigned int iov_len = 16;
@@ -579,6 +923,8 @@ TEST_F(tls, recvmsg_multiple)
 	struct msghdr hdr;
 	int i;
 
+	memrnd(buf, sizeof(buf));
+
 	EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
 	for (i = 0; i < msg_iovlen; i++) {
 		iov_base[i] = (char *)malloc(iov_len);
@@ -589,8 +935,6 @@ TEST_F(tls, recvmsg_multiple)
 	hdr.msg_iovlen = msg_iovlen;
 	hdr.msg_iov = vec;
 	EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
-	for (i = 0; i < msg_iovlen; i++)
-		len_compared += iov_len;
 
 	for (i = 0; i < msg_iovlen; i++)
 		free(iov_base[i]);
@@ -603,6 +947,8 @@ TEST_F(tls, single_send_multiple_recv)
 	char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
 	char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
 
+	memrnd(send_mem, sizeof(send_mem));
+
 	EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
 	memset(recv_mem, 0, total_len);
 
@@ -618,6 +964,8 @@ TEST_F(tls, multiple_send_single_recv)
 	char recv_mem[2 * 10];
 	char send_mem[10];
 
+	memrnd(send_mem, sizeof(send_mem));
+
 	EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
 	EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
 	memset(recv_mem, 0, total_len);
@@ -634,6 +982,8 @@ TEST_F(tls, single_send_multiple_recv_non_align)
 	char recv_mem[recv_len * 2];
 	char send_mem[total_len];
 
+	memrnd(send_mem, sizeof(send_mem));
+
 	EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
 	memset(recv_mem, 0, total_len);
 
@@ -652,12 +1002,12 @@ TEST_F(tls, recv_partial)
 
 	memset(recv_mem, 0, sizeof(recv_mem));
 	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
-	EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first),
-		       MSG_WAITALL), -1);
+	EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first),
+		       MSG_WAITALL), strlen(test_str_first));
 	EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
 	memset(recv_mem, 0, sizeof(recv_mem));
-	EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second),
-		       MSG_WAITALL), -1);
+	EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second),
+		       MSG_WAITALL), strlen(test_str_second));
 	EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
 		  0);
 }
@@ -679,10 +1029,10 @@ TEST_F(tls, recv_peek)
 	char buf[15];
 
 	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
-	EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), send_len);
 	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
 	memset(buf, 0, sizeof(buf));
-	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
 	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
 }
 
@@ -803,18 +1153,17 @@ TEST_F(tls, bidir)
 	int ret;
 
 	if (!self->notls) {
-		struct tls12_crypto_info_aes_gcm_128 tls12;
+		struct tls_crypto_info_keys tls12;
 
-		memset(&tls12, 0, sizeof(tls12));
-		tls12.info.version = variant->tls_version;
-		tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+		tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+				     &tls12);
 
 		ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
-				 sizeof(tls12));
+				 tls12.len);
 		ASSERT_EQ(ret, 0);
 
 		ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12,
-				 sizeof(tls12));
+				 tls12.len);
 		ASSERT_EQ(ret, 0);
 	}
 
@@ -1109,63 +1458,78 @@ TEST_F(tls, mutliproc_sendpage_writers)
 
 TEST_F(tls, control_msg)
 {
-	if (self->notls)
-		return;
-
-	char cbuf[CMSG_SPACE(sizeof(char))];
-	char const *test_str = "test_read";
-	int cmsg_len = sizeof(char);
+	char *test_str = "test_read";
 	char record_type = 100;
-	struct cmsghdr *cmsg;
-	struct msghdr msg;
 	int send_len = 10;
-	struct iovec vec;
 	char buf[10];
 
-	vec.iov_base = (char *)test_str;
-	vec.iov_len = 10;
-	memset(&msg, 0, sizeof(struct msghdr));
-	msg.msg_iov = &vec;
-	msg.msg_iovlen = 1;
-	msg.msg_control = cbuf;
-	msg.msg_controllen = sizeof(cbuf);
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_TLS;
-	/* test sending non-record types. */
-	cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
-	cmsg->cmsg_len = CMSG_LEN(cmsg_len);
-	*CMSG_DATA(cmsg) = record_type;
-	msg.msg_controllen = cmsg->cmsg_len;
+	if (self->notls)
+		SKIP(return, "no TLS support");
 
-	EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+	EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0),
+		  send_len);
 	/* Should fail because we didn't provide a control message */
 	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
 
-	vec.iov_base = buf;
-	EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len);
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	EXPECT_NE(cmsg, NULL);
-	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
-	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
-	record_type = *((unsigned char *)CMSG_DATA(cmsg));
-	EXPECT_EQ(record_type, 100);
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL | MSG_PEEK),
+		  send_len);
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 
 	/* Recv the message again without MSG_PEEK */
-	record_type = 0;
 	memset(buf, 0, sizeof(buf));
 
-	EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
-	cmsg = CMSG_FIRSTHDR(&msg);
-	EXPECT_NE(cmsg, NULL);
-	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
-	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
-	record_type = *((unsigned char *)CMSG_DATA(cmsg));
-	EXPECT_EQ(record_type, 100);
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL),
+		  send_len);
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
+TEST_F(tls, control_msg_nomerge)
+{
+	char *rec1 = "1111";
+	char *rec2 = "2222";
+	int send_len = 5;
+	char buf[15];
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len);
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len);
+
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len);
+	EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len);
+	EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len);
+	EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len);
+	EXPECT_EQ(memcmp(buf, rec2, send_len), 0);
+}
+
+TEST_F(tls, data_control_data)
+{
+	char *rec1 = "1111";
+	char *rec2 = "2222";
+	char *rec3 = "3333";
+	int send_len = 5;
+	char buf[15];
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len);
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len);
+	EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len);
+
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+}
+
 TEST_F(tls, shutdown)
 {
 	char const *test_str = "test_read";
@@ -1217,6 +1581,356 @@ TEST_F(tls, shutdown_reuse)
 	EXPECT_EQ(errno, EISCONN);
 }
 
+TEST_F(tls, getsockopt)
+{
+	struct tls_crypto_info_keys expect, get;
+	socklen_t len;
+
+	/* get only the version/cipher */
+	len = sizeof(struct tls_crypto_info);
+	memrnd(&get, sizeof(get));
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
+	EXPECT_EQ(len, sizeof(struct tls_crypto_info));
+	EXPECT_EQ(get.crypto_info.version, variant->tls_version);
+	EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
+
+	/* get the full crypto_info */
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect);
+	len = expect.len;
+	memrnd(&get, sizeof(get));
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
+	EXPECT_EQ(len, expect.len);
+	EXPECT_EQ(get.crypto_info.version, variant->tls_version);
+	EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
+	EXPECT_EQ(memcmp(&get, &expect, expect.len), 0);
+
+	/* short get should fail */
+	len = sizeof(struct tls_crypto_info) - 1;
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* partial get of the cipher data should fail */
+	len = expect.len - 1;
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1);
+	EXPECT_EQ(errno, EINVAL);
+}
+
+FIXTURE(tls_err)
+{
+	int fd, cfd;
+	int fd2, cfd2;
+	bool notls;
+};
+
+FIXTURE_VARIANT(tls_err)
+{
+	uint16_t tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm)
+{
+	.tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm)
+{
+	.tls_version = TLS_1_3_VERSION,
+};
+
+FIXTURE_SETUP(tls_err)
+{
+	struct tls_crypto_info_keys tls12;
+	int ret;
+
+	tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
+			     &tls12);
+
+	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+	ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
+	if (self->notls)
+		return;
+
+	ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(tls_err)
+{
+	close(self->fd);
+	close(self->cfd);
+	close(self->fd2);
+	close(self->cfd2);
+}
+
+TEST_F(tls_err, bad_rec)
+{
+	char buf[64];
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	memset(buf, 0x55, sizeof(buf));
+	EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EMSGSIZE);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1);
+	EXPECT_EQ(errno, EAGAIN);
+}
+
+TEST_F(tls_err, bad_auth)
+{
+	char buf[128];
+	int n;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	memrnd(buf, sizeof(buf) / 2);
+	EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2);
+	n = recv(self->cfd, buf, sizeof(buf), 0);
+	EXPECT_GT(n, sizeof(buf) / 2);
+
+	buf[n - 1]++;
+
+	EXPECT_EQ(send(self->fd2, buf, n, 0), n);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_in_large_read)
+{
+	char txt[3][64];
+	char cip[3][128];
+	char buf[3 * 128];
+	int i, n;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	/* Put 3 records in the sockets */
+	for (i = 0; i < 3; i++) {
+		memrnd(txt[i], sizeof(txt[i]));
+		EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0),
+			  sizeof(txt[i]));
+		n = recv(self->cfd, cip[i], sizeof(cip[i]), 0);
+		EXPECT_GT(n, sizeof(txt[i]));
+		/* Break the third message */
+		if (i == 2)
+			cip[2][n - 1]++;
+		EXPECT_EQ(send(self->fd2, cip[i], n, 0), n);
+	}
+
+	/* We should be able to receive the first two messages */
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2);
+	EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0);
+	EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0);
+	/* Third mesasge is bad */
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_cmsg)
+{
+	char *test_str = "test_read";
+	int send_len = 10;
+	char cip[128];
+	char buf[128];
+	char txt[64];
+	int n;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	/* Queue up one data record */
+	memrnd(txt, sizeof(txt));
+	EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt));
+	n = recv(self->cfd, cip, sizeof(cip), 0);
+	EXPECT_GT(n, sizeof(txt));
+	EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+	EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+	n = recv(self->cfd, cip, sizeof(cip), 0);
+	cip[n - 1]++; /* Break it */
+	EXPECT_GT(n, send_len);
+	EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt));
+	EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+	EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, timeo)
+{
+	struct timeval tv = { .tv_usec = 10000, };
+	char buf[128];
+	int ret;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	ret = setsockopt(self->cfd2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+	ASSERT_EQ(ret, 0);
+
+	ret = fork();
+	ASSERT_GE(ret, 0);
+
+	if (ret) {
+		usleep(1000); /* Give child a head start */
+
+		EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+		EXPECT_EQ(errno, EAGAIN);
+
+		EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+		EXPECT_EQ(errno, EAGAIN);
+
+		wait(&ret);
+	} else {
+		EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+		EXPECT_EQ(errno, EAGAIN);
+		exit(0);
+	}
+}
+
+TEST_F(tls_err, poll_partial_rec)
+{
+	struct pollfd pfd = { };
+	ssize_t rec_len;
+	char rec[256];
+	char buf[128];
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	pfd.fd = self->cfd2;
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 1), 0);
+
+	memrnd(buf, sizeof(buf));
+	EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+	rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+	EXPECT_GT(rec_len, sizeof(buf));
+
+	/* Write 100B, not the full record ... */
+	EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+	/* ... no full record should mean no POLLIN */
+	pfd.fd = self->cfd2;
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 1), 0);
+	/* Now write the rest, and it should all pop out of the other end. */
+	EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100);
+	pfd.fd = self->cfd2;
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 1), 1);
+	EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf));
+	EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0);
+}
+
+TEST_F(tls_err, epoll_partial_rec)
+{
+	struct epoll_event ev, events[10];
+	ssize_t rec_len;
+	char rec[256];
+	char buf[128];
+	int epollfd;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	epollfd = epoll_create1(0);
+	ASSERT_GE(epollfd, 0);
+
+	memset(&ev, 0, sizeof(ev));
+	ev.events = EPOLLIN;
+	ev.data.fd = self->cfd2;
+	ASSERT_GE(epoll_ctl(epollfd, EPOLL_CTL_ADD, self->cfd2, &ev), 0);
+
+	EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0);
+
+	memrnd(buf, sizeof(buf));
+	EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+	rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+	EXPECT_GT(rec_len, sizeof(buf));
+
+	/* Write 100B, not the full record ... */
+	EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+	/* ... no full record should mean no POLLIN */
+	EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0);
+	/* Now write the rest, and it should all pop out of the other end. */
+	EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100);
+	EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 1);
+	EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf));
+	EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0);
+
+	close(epollfd);
+}
+
+TEST_F(tls_err, poll_partial_rec_async)
+{
+	struct pollfd pfd = { };
+	ssize_t rec_len;
+	char rec[256];
+	char buf[128];
+	char token;
+	int p[2];
+	int ret;
+
+	if (self->notls)
+		SKIP(return, "no TLS support");
+
+	ASSERT_GE(pipe(p), 0);
+
+	memrnd(buf, sizeof(buf));
+	EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+	rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+	EXPECT_GT(rec_len, sizeof(buf));
+
+	ret = fork();
+	ASSERT_GE(ret, 0);
+
+	if (ret) {
+		int status, pid2;
+
+		close(p[1]);
+		usleep(1000); /* Give child a head start */
+
+		EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+
+		EXPECT_EQ(read(p[0], &token, 1), 1); /* Barrier #1 */
+
+		EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0),
+			  rec_len - 100);
+
+		pid2 = wait(&status);
+		EXPECT_EQ(pid2, ret);
+		EXPECT_EQ(status, 0);
+	} else {
+		close(p[0]);
+
+		/* Child should sleep in poll(), never get a wake */
+		pfd.fd = self->cfd2;
+		pfd.events = POLLIN;
+		EXPECT_EQ(poll(&pfd, 1, 20), 0);
+
+		EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */
+
+		pfd.fd = self->cfd2;
+		pfd.events = POLLIN;
+		EXPECT_EQ(poll(&pfd, 1, 20), 1);
+
+		exit(!__test_passed(_metadata));
+	}
+}
+
 TEST(non_established) {
 	struct tls12_crypto_info_aes_gcm_256 tls12;
 	struct sockaddr_in addr;
@@ -1271,64 +1985,191 @@ TEST(non_established) {
 
 TEST(keysizes) {
 	struct tls12_crypto_info_aes_gcm_256 tls12;
-	struct sockaddr_in addr;
-	int sfd, ret, fd, cfd;
-	socklen_t len;
+	int ret, fd, cfd;
 	bool notls;
 
-	notls = false;
-	len = sizeof(addr);
-
 	memset(&tls12, 0, sizeof(tls12));
 	tls12.info.version = TLS_1_2_VERSION;
 	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
 
-	addr.sin_family = AF_INET;
-	addr.sin_addr.s_addr = htonl(INADDR_ANY);
-	addr.sin_port = 0;
+	ulp_sock_pair(_metadata, &fd, &cfd, &notls);
 
-	fd = socket(AF_INET, SOCK_STREAM, 0);
-	sfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (!notls) {
+		ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
+				 sizeof(tls12));
+		EXPECT_EQ(ret, 0);
+
+		ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
+				 sizeof(tls12));
+		EXPECT_EQ(ret, 0);
+	}
+
+	close(fd);
+	close(cfd);
+}
+
+TEST(no_pad) {
+	struct tls12_crypto_info_aes_gcm_256 tls12;
+	int ret, fd, cfd, val;
+	socklen_t len;
+	bool notls;
+
+	memset(&tls12, 0, sizeof(tls12));
+	tls12.info.version = TLS_1_3_VERSION;
+	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
+
+	ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+	if (notls)
+		exit(KSFT_SKIP);
+
+	ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, sizeof(tls12));
+	EXPECT_EQ(ret, 0);
+
+	ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, sizeof(tls12));
+	EXPECT_EQ(ret, 0);
+
+	val = 1;
+	ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+			 (void *)&val, sizeof(val));
+	EXPECT_EQ(ret, 0);
+
+	len = sizeof(val);
+	val = 2;
+	ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+			 (void *)&val, &len);
+	EXPECT_EQ(ret, 0);
+	EXPECT_EQ(val, 1);
+	EXPECT_EQ(len, 4);
+
+	val = 0;
+	ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+			 (void *)&val, sizeof(val));
+	EXPECT_EQ(ret, 0);
+
+	len = sizeof(val);
+	val = 2;
+	ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+			 (void *)&val, &len);
+	EXPECT_EQ(ret, 0);
+	EXPECT_EQ(val, 0);
+	EXPECT_EQ(len, 4);
+
+	close(fd);
+	close(cfd);
+}
+
+TEST(tls_v6ops) {
+	struct tls_crypto_info_keys tls12;
+	struct sockaddr_in6 addr, addr2;
+	int sfd, ret, fd;
+	socklen_t len, len2;
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+
+	addr.sin6_family = AF_INET6;
+	addr.sin6_addr = in6addr_any;
+	addr.sin6_port = 0;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	sfd = socket(AF_INET6, SOCK_STREAM, 0);
 
 	ret = bind(sfd, &addr, sizeof(addr));
 	ASSERT_EQ(ret, 0);
 	ret = listen(sfd, 10);
 	ASSERT_EQ(ret, 0);
 
+	len = sizeof(addr);
 	ret = getsockname(sfd, &addr, &len);
 	ASSERT_EQ(ret, 0);
 
 	ret = connect(fd, &addr, sizeof(addr));
 	ASSERT_EQ(ret, 0);
 
+	len = sizeof(addr);
+	ret = getsockname(fd, &addr, &len);
+	ASSERT_EQ(ret, 0);
+
 	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
-	if (ret != 0) {
-		notls = true;
-		printf("Failure setting TCP_ULP, testing without tls\n");
+	if (ret) {
+		ASSERT_EQ(errno, ENOENT);
+		SKIP(return, "no TLS support");
 	}
+	ASSERT_EQ(ret, 0);
 
-	if (!notls) {
-		ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
-				 sizeof(tls12));
-		EXPECT_EQ(ret, 0);
-	}
+	ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
 
-	cfd = accept(sfd, &addr, &len);
-	ASSERT_GE(cfd, 0);
+	ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
 
-	if (!notls) {
-		ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
-				 sizeof("tls"));
-		EXPECT_EQ(ret, 0);
+	len2 = sizeof(addr2);
+	ret = getsockname(fd, &addr2, &len2);
+	ASSERT_EQ(ret, 0);
 
-		ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
-				 sizeof(tls12));
-		EXPECT_EQ(ret, 0);
-	}
+	EXPECT_EQ(len2, len);
+	EXPECT_EQ(memcmp(&addr, &addr2, len), 0);
 
+	close(fd);
 	close(sfd);
+}
+
+TEST(prequeue) {
+	struct tls_crypto_info_keys tls12;
+	char buf[20000], buf2[20000];
+	struct sockaddr_in addr;
+	int sfd, cfd, ret, fd;
+	socklen_t len;
+
+	len = sizeof(addr);
+	memrnd(buf, sizeof(buf));
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12);
+
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	addr.sin_port = 0;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+	ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0);
+	ASSERT_EQ(listen(sfd, 10), 0);
+	ASSERT_EQ(getsockname(sfd, &addr, &len), 0);
+	ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0);
+	ASSERT_GE(cfd = accept(sfd, &addr, &len), 0);
+	close(sfd);
+
+	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	if (ret) {
+		ASSERT_EQ(errno, ENOENT);
+		SKIP(return, "no TLS support");
+	}
+
+	ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+	EXPECT_EQ(send(fd, buf, sizeof(buf), MSG_DONTWAIT), sizeof(buf));
+
+	ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
+	ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+	EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_WAITALL), sizeof(buf2));
+
+	EXPECT_EQ(memcmp(buf, buf2, sizeof(buf)), 0);
+
 	close(fd);
 	close(cfd);
 }
 
+static void __attribute__((constructor)) fips_check(void) {
+	int res;
+	FILE *f;
+
+	f = fopen("/proc/sys/crypto/fips_enabled", "r");
+	if (f) {
+		res = fscanf(f, "%d", &fips_enabled);
+		if (res != 1)
+			ksft_print_msg("ERROR: Couldn't read /proc/sys/crypto/fips_enabled\n");
+		fclose(f);
+	}
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644
index 000000000000..9ba03164d73a
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define TOEPLITZ_KEY_MIN_LEN	40
+#define TOEPLITZ_KEY_MAX_LEN	60
+
+#define TOEPLITZ_STR_LEN(K)	(((K) * 3) - 1)	/* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN	TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN	TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN	((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16)	/* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL	/* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport =	8000;
+static int cfg_family =		AF_INET6;
+static char *cfg_ifname =	"eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type =		SOCK_STREAM;
+static int cfg_timeout_msec =	1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...)	do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+	int fd;
+	char *mmap;
+	int idx;
+	int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS];	/* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+				const unsigned char *key)
+{
+	int i, bit, ret = 0;
+	uint32_t key32;
+
+	key32 = ntohl(*((uint32_t *)key));
+	key += 4;
+
+	for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+		for (bit = 7; bit >= 0; bit--) {
+			if (four_tuple[i] & (1 << bit))
+				ret ^= key32;
+
+			key32 <<= 1;
+			key32 |= !!(key[0] & (1 << bit));
+		}
+		key++;
+	}
+
+	return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+	int queue = rx_hash % cfg_num_queues;
+
+	log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+	if (rx_irq_cpus[queue] != cpu) {
+		log_verbose(". error: rss cpu mismatch (%d)", cpu);
+		frames_error++;
+	}
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+	int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+	log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+	if (rps_silo_to_cpu[silo] != cpu) {
+		log_verbose(". error: rps cpu mismatch (%d)", cpu);
+		frames_error++;
+	}
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+		       const char *addrs, int addr_len)
+{
+	char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+	uint16_t *ports;
+
+	if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+	    !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+		error(1, 0, "address parse error");
+
+	ports = (void *)addrs + (addr_len * 2);
+	log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+		    cpu, rx_hash, saddr, daddr,
+		    ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+	unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+	uint32_t rx_hash_sw;
+	const char *addrs;
+	int addr_len;
+
+	if (cfg_family == AF_INET) {
+		addr_len = sizeof(struct in_addr);
+		addrs = pkt + offsetof(struct iphdr, saddr);
+	} else {
+		addr_len = sizeof(struct in6_addr);
+		addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+	}
+
+	memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+	rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+	if (cfg_verbose)
+		log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+	if (rx_hash != rx_hash_sw) {
+		log_verbose(" != expected 0x%x\n", rx_hash_sw);
+		frames_error++;
+		return;
+	}
+
+	log_verbose(" OK");
+	if (cfg_num_queues)
+		verify_rss(rx_hash, cpu);
+	else if (cfg_num_rps_cpus)
+		verify_rps(rx_hash, cpu);
+	log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+	struct tpacket3_hdr *hdr = (void *)frame;
+
+	if (hdr->hv1.tp_rxhash)
+		verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+			      ring->cpu);
+	else
+		frames_nohash++;
+
+	return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static bool recv_block(struct ring_state *ring)
+{
+	struct tpacket_block_desc *block;
+	char *frame;
+	int i;
+
+	block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+	if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+		return false;
+
+	frame = (char *)block;
+	frame += block->hdr.bh1.offset_to_first_pkt;
+
+	for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+		frame = recv_frame(ring, frame);
+		frames_received++;
+	}
+
+	block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+	ring->idx = (ring->idx + 1) % ring_block_nr;
+
+	return true;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+	int i;
+
+	usleep(1000 * cfg_timeout_msec);
+
+	for (i = 0; i < num_cpus; i++)
+		do {} while (recv_block(&rings[i]));
+
+	fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+		frames_received - frames_nohash - frames_error,
+		frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+	struct tpacket_req3 req3 = {0};
+	void *ring;
+
+	req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
+	req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+	req3.tp_frame_size = 2048;
+	req3.tp_frame_nr = 1 << 10;
+	req3.tp_block_nr = 16;
+
+	req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+	req3.tp_block_size /= req3.tp_block_nr;
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+		error(1, errno, "setsockopt PACKET_RX_RING");
+
+	ring_block_sz = req3.tp_block_size;
+	ring_block_nr = req3.tp_block_nr;
+
+	ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+		    PROT_READ | PROT_WRITE,
+		    MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+	if (ring == MAP_FAILED)
+		error(1, 0, "mmap failed");
+
+	return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, off_proto),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+		BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, off_dport),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+	};
+	struct sock_fprog prog = {};
+
+	prog.filter = filter;
+	prog.len = ARRAY_SIZE(filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+		error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+	const int off_dport = offsetof(struct tcphdr, dest);	/* same for udp */
+	uint8_t proto;
+
+	proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+	if (cfg_family == AF_INET)
+		__set_filter(fd, offsetof(struct iphdr, protocol), proto,
+			     sizeof(struct iphdr) + off_dport);
+	else
+		__set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+			     sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET + BPF_K, 0),
+	};
+	struct sock_fprog prog = {};
+
+	prog.filter = filter;
+	prog.len = ARRAY_SIZE(filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+		error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+	struct fanout_args args = {
+		.id = 1,
+		.type_flags = PACKET_FANOUT_CPU,
+		.max_num_members = RSS_MAX_CPUS
+	};
+	struct sockaddr_ll ll = { 0 };
+	int fd, val;
+
+	fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket creation failed");
+
+	val = TPACKET_V3;
+	if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+		error(1, errno, "setsockopt PACKET_VERSION");
+	*ring = setup_ring(fd);
+
+	/* block packets until all rings are added to the fanout group:
+	 * else packets can arrive during setup and get misclassified
+	 */
+	set_filter_null(fd);
+
+	ll.sll_family = AF_PACKET;
+	ll.sll_ifindex = if_nametoindex(cfg_ifname);
+	ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+						  htons(ETH_P_IPV6);
+	if (bind(fd, (void *)&ll, sizeof(ll)))
+		error(1, errno, "bind");
+
+	/* must come after bind: verifies all programs in group match */
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+		/* on failure, retry using old API if that is sufficient:
+		 * it has a hard limit of 256 sockets, so only try if
+		 * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+		 * in this API, the third argument is left implicit.
+		 */
+		if (cfg_num_queues || num_cpus > 256 ||
+		    setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+			       &args, sizeof(uint32_t)))
+			error(1, errno, "setsockopt PACKET_FANOUT cpu");
+	}
+
+	return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+	int fd, val;
+
+	fd = socket(cfg_family, cfg_type, 0);
+	if (fd == -1)
+		error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+	val = 1 << 20;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+		error(1, errno, "setsockopt rcvbuf");
+
+	return fd;
+}
+
+static void setup_rings(void)
+{
+	int i;
+
+	for (i = 0; i < num_cpus; i++) {
+		rings[i].cpu = i;
+		rings[i].fd = create_ring(&rings[i].mmap);
+	}
+
+	/* accept packets once all rings in the fanout group are up */
+	for (i = 0; i < num_cpus; i++)
+		set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+	int i;
+
+	for (i = 0; i < num_cpus; i++) {
+		if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+			error(1, errno, "munmap");
+		if (close(rings[i].fd))
+			error(1, errno, "close");
+	}
+}
+
+static void parse_cpulist(const char *arg)
+{
+	do {
+		rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+		arg = strchr(arg, ',');
+		if (!arg)
+			break;
+		arg++;			// skip ','
+	} while (1);
+}
+
+static void show_cpulist(void)
+{
+	int i;
+
+	for (i = 0; i < cfg_num_queues; i++)
+		fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+	int i;
+
+	for (i = 0; i < cfg_num_rps_cpus; i++)
+		fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+	int i, ret, off;
+
+	if (slen < TOEPLITZ_STR_MIN_LEN ||
+	    slen > TOEPLITZ_STR_MAX_LEN + 1)
+		error(1, 0, "invalid toeplitz key");
+
+	for (i = 0, off = 0; off < slen; i++, off += 3) {
+		ret = sscanf(str + off, "%hhx", &key[i]);
+		if (ret != 1)
+			error(1, 0, "key parse error at %d off %d len %d",
+			      i, off, slen);
+	}
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+	unsigned long bitmap;
+	int i;
+
+	bitmap = strtoul(arg, NULL, 0);
+
+	if (bitmap & ~(RPS_MAX_CPUS - 1))
+		error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+		      bitmap, RPS_MAX_CPUS - 1);
+
+	for (i = 0; i < RPS_MAX_CPUS; i++)
+		if (bitmap & 1UL << i)
+			rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	static struct option long_options[] = {
+	    {"dport",	required_argument, 0, 'd'},
+	    {"cpus",	required_argument, 0, 'C'},
+	    {"key",	required_argument, 0, 'k'},
+	    {"iface",	required_argument, 0, 'i'},
+	    {"ipv4",	no_argument, 0, '4'},
+	    {"ipv6",	no_argument, 0, '6'},
+	    {"sink",	no_argument, 0, 's'},
+	    {"tcp",	no_argument, 0, 't'},
+	    {"timeout",	required_argument, 0, 'T'},
+	    {"udp",	no_argument, 0, 'u'},
+	    {"verbose",	no_argument, 0, 'v'},
+	    {"rps",	required_argument, 0, 'r'},
+	    {0, 0, 0, 0}
+	};
+	bool have_toeplitz = false;
+	int index, c;
+
+	while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = AF_INET;
+			break;
+		case '6':
+			cfg_family = AF_INET6;
+			break;
+		case 'C':
+			parse_cpulist(optarg);
+			break;
+		case 'd':
+			cfg_dport = strtol(optarg, NULL, 0);
+			break;
+		case 'i':
+			cfg_ifname = optarg;
+			break;
+		case 'k':
+			parse_toeplitz_key(optarg, strlen(optarg),
+					   toeplitz_key);
+			have_toeplitz = true;
+			break;
+		case 'r':
+			parse_rps_bitmap(optarg);
+			break;
+		case 's':
+			cfg_sink = true;
+			break;
+		case 't':
+			cfg_type = SOCK_STREAM;
+			break;
+		case 'T':
+			cfg_timeout_msec = strtol(optarg, NULL, 0);
+			break;
+		case 'u':
+			cfg_type = SOCK_DGRAM;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+
+		default:
+			error(1, 0, "unknown option %c", optopt);
+			break;
+		}
+	}
+
+	if (!have_toeplitz)
+		error(1, 0, "Must supply rss key ('-k')");
+
+	num_cpus = get_nprocs();
+	if (num_cpus > RSS_MAX_CPUS)
+		error(1, 0, "increase RSS_MAX_CPUS");
+
+	if (cfg_num_queues && cfg_num_rps_cpus)
+		error(1, 0,
+		      "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+	if (cfg_verbose) {
+		show_cpulist();
+		show_silos();
+	}
+}
+
+int main(int argc, char **argv)
+{
+	const int min_tests = 10;
+	int fd_sink = -1;
+
+	parse_opts(argc, argv);
+
+	if (cfg_sink)
+		fd_sink = setup_sink();
+
+	setup_rings();
+	process_rings();
+	cleanup_rings();
+
+	if (cfg_sink && close(fd_sink))
+		error(1, errno, "close sink");
+
+	if (frames_received - frames_nohash < min_tests)
+		error(1, 0, "too few frames for verification");
+
+	return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755
index 000000000000..8ff172f7bb1b
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+	echo $(ethtool -x "${DEV}" |
+		grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
+		cut -d: -f2- |
+		awk '{$1=$1};1' |
+		tr ' ' '\n' |
+		sort -u |
+		wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+	CPUS=""
+	# sort so that irq 2 is read before irq 10
+	SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+	# Consider only as many queues as RSS actually uses. We assume that
+	# if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+	RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+	RXQ_COUNT=0
+
+	for i in ${SORTED_IRQS}
+	do
+		[[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+		# lookup relevant IRQs by action name
+		[[ -e "$i/actions" ]] || continue
+		cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+		irqname=$(<"$i/actions")
+
+		# does the IRQ get called
+		irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+		[[ -n "${irqcount}" ]] || continue
+
+		# lookup CPU
+		irq=$(basename "$i")
+		cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+		if [[ -z "${CPUS}" ]]; then
+			CPUS="${cpu}"
+		else
+			CPUS="${CPUS},${cpu}"
+		fi
+		RXQ_COUNT=$((RXQ_COUNT+1))
+	done
+
+	echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+	echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+	CMD=""
+	for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+	do
+		CMD="${CMD} echo $1 > ${i};"
+	done
+
+	echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+	echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+	echo "$1"
+	exit 1
+}
+
+check_nic_rxhash_enabled() {
+	local -r pattern="receive-hashing:\ on"
+
+	ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+	local prog=$0
+	shift 1
+
+	while [[ "$1" =~ "-" ]]; do
+		if [[ "$1" = "-irq_prefix" ]]; then
+			shift
+			IRQ_PATTERN="^$1-[0-9]*$"
+		elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+			PROTO_FLAG="$1"
+		elif [[ "$1" = "-4" ]]; then
+			IP_FLAG="$1"
+			SERVER_IP="${SERVER_IP4}"
+			CLIENT_IP="${CLIENT_IP4}"
+		elif [[ "$1" = "-6" ]]; then
+			IP_FLAG="$1"
+			SERVER_IP="${SERVER_IP6}"
+			CLIENT_IP="${CLIENT_IP6}"
+		elif [[ "$1" = "-rss" ]]; then
+			TEST_RSS=true
+		elif [[ "$1" = "-rps" ]]; then
+			shift
+			RPS_MAP="$1"
+		elif [[ "$1" = "-i" ]]; then
+			shift
+			DEV="$1"
+		else
+			die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+			     [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+		fi
+		shift
+	done
+}
+
+setup() {
+	setup_loopback_environment "${DEV}"
+
+	# Set up server_ns namespace and client_ns namespace
+	setup_macvlan_ns "${DEV}" $server_ns server \
+	"${SERVER_MAC}" "${SERVER_IP}"
+	setup_macvlan_ns "${DEV}" $client_ns client \
+	"${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+	cleanup_macvlan_ns $server_ns server $client_ns client
+	cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+	# RPS/RFS must be disabled because they move packets between cpus,
+	# which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+	eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+	  ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+	  -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+	eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+	  ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+	  -r "0x${RPS_MAP}" -s -v &
+else
+	ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+  "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+	echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755
index 000000000000..2fef34f4aba1
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz_client.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+	expiration=$((SECONDS+20))
+	while [[ "${SECONDS}" -lt "${expiration}" ]]
+	do
+		if [[ "${PROTO}" == "-u" ]]; then
+			echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+		else
+			echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+		fi
+		sleep 0.001
+	done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index de9ca97abc30..282f14760940 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -4,6 +4,7 @@
 # Run traceroute/traceroute6 tests
 #
 
+source lib.sh
 VERBOSE=0
 PAUSE_ON_FAIL=no
 
@@ -69,9 +70,6 @@ create_ns()
 	[ -z "${addr}" ] && addr="-"
 	[ -z "${addr6}" ] && addr6="-"
 
-	ip netns add ${ns}
-
-	ip netns exec ${ns} ip link set lo up
 	if [ "${addr}" != "-" ]; then
 		ip netns exec ${ns} ip addr add dev lo ${addr}
 	fi
@@ -160,12 +158,7 @@ connect_ns()
 
 cleanup_traceroute6()
 {
-	local ns
-
-	for ns in host-1 host-2 router-1 router-2
-	do
-		ip netns del ${ns} 2>/dev/null
-	done
+	cleanup_ns $h1 $h2 $r1 $r2
 }
 
 setup_traceroute6()
@@ -176,33 +169,34 @@ setup_traceroute6()
 	cleanup_traceroute6
 
 	set -e
-	create_ns host-1
-	create_ns host-2
-	create_ns router-1
-	create_ns router-2
+	setup_ns h1 h2 r1 r2
+	create_ns $h1
+	create_ns $h2
+	create_ns $r1
+	create_ns $r2
 
 	# Setup N3
-	connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64
-	ip netns exec host-2 ip route add default via 2000:103::2
+	connect_ns $r2 eth3 - 2000:103::2/64 $h2 eth3 - 2000:103::4/64
+	ip netns exec $h2 ip route add default via 2000:103::2
 
 	# Setup N2
-	connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64
-	ip netns exec router-1 ip route add default via 2000:102::2
+	connect_ns $r1 eth2 - 2000:102::1/64 $r2 eth2 - 2000:102::2/64
+	ip netns exec $r1 ip route add default via 2000:102::2
 
 	# Setup N1. host-1 and router-2 connect to a bridge in router-1.
-	ip netns exec router-1 ip link add name ${brdev} type bridge
-	ip netns exec router-1 ip link set ${brdev} up
-	ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev}
+	ip netns exec $r1 ip link add name ${brdev} type bridge
+	ip netns exec $r1 ip link set ${brdev} up
+	ip netns exec $r1 ip addr add 2000:101::1/64 dev ${brdev}
 
-	connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - -
-	ip netns exec router-1 ip link set dev eth0 master ${brdev}
-	ip netns exec host-1 ip route add default via 2000:101::1
+	connect_ns $h1 eth0 - 2000:101::3/64 $r1 eth0 - -
+	ip netns exec $r1 ip link set dev eth0 master ${brdev}
+	ip netns exec $h1 ip route add default via 2000:101::1
 
-	connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - -
-	ip netns exec router-1 ip link set dev eth1 master ${brdev}
+	connect_ns $r2 eth1 - 2000:101::2/64 $r1 eth1 - -
+	ip netns exec $r1 ip link set dev eth1 master ${brdev}
 
 	# Prime the network
-	ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1
+	ip netns exec $h1 ping6 -c5 2000:103::4 >/dev/null 2>&1
 
 	set +e
 }
@@ -217,7 +211,7 @@ run_traceroute6()
 	setup_traceroute6
 
 	# traceroute6 host-2 from host-1 (expects 2000:102::2)
-	run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
+	run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
 	log_test $? 0 "IPV6 traceroute"
 
 	cleanup_traceroute6
@@ -240,12 +234,7 @@ run_traceroute6()
 
 cleanup_traceroute()
 {
-	local ns
-
-	for ns in host-1 host-2 router
-	do
-		ip netns del ${ns} 2>/dev/null
-	done
+	cleanup_ns $h1 $h2 $router
 }
 
 setup_traceroute()
@@ -254,24 +243,25 @@ setup_traceroute()
 	cleanup_traceroute
 
 	set -e
-	create_ns host-1
-	create_ns host-2
-	create_ns router
+	setup_ns h1 h2 router
+	create_ns $h1
+	create_ns $h2
+	create_ns $router
 
-	connect_ns host-1 eth0 1.0.1.3/24 - \
-	           router eth1 1.0.3.1/24 -
-	ip netns exec host-1 ip route add default via 1.0.1.1
+	connect_ns $h1 eth0 1.0.1.3/24 - \
+	           $router eth1 1.0.3.1/24 -
+	ip netns exec $h1 ip route add default via 1.0.1.1
 
-	ip netns exec router ip addr add 1.0.1.1/24 dev eth1
-	ip netns exec router sysctl -qw \
+	ip netns exec $router ip addr add 1.0.1.1/24 dev eth1
+	ip netns exec $router sysctl -qw \
 				net.ipv4.icmp_errors_use_inbound_ifaddr=1
 
-	connect_ns host-2 eth0 1.0.2.4/24 - \
-	           router eth2 1.0.2.1/24 -
-	ip netns exec host-2 ip route add default via 1.0.2.1
+	connect_ns $h2 eth0 1.0.2.4/24 - \
+	           $router eth2 1.0.2.1/24 -
+	ip netns exec $h2 ip route add default via 1.0.2.1
 
 	# Prime the network
-	ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1
+	ip netns exec $h1 ping -c5 1.0.2.4 >/dev/null 2>&1
 
 	set +e
 }
@@ -286,7 +276,7 @@ run_traceroute()
 	setup_traceroute
 
 	# traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
-	run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
+	run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
 	log_test $? 0 "IPV4 traceroute"
 
 	cleanup_traceroute
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
new file mode 100644
index 000000000000..fa83918b62d1
--- /dev/null
+++ b/tools/testing/selftests/net/tun.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+static int tun_attach(int fd, char *dev)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+	strcpy(ifr.ifr_name, dev);
+	ifr.ifr_flags = IFF_ATTACH_QUEUE;
+
+	return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+}
+
+static int tun_detach(int fd, char *dev)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+	strcpy(ifr.ifr_name, dev);
+	ifr.ifr_flags = IFF_DETACH_QUEUE;
+
+	return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+}
+
+static int tun_alloc(char *dev)
+{
+	struct ifreq ifr;
+	int fd, err;
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, "can't open tun: %s\n", strerror(errno));
+		return fd;
+	}
+
+	memset(&ifr, 0, sizeof(ifr));
+	strcpy(ifr.ifr_name, dev);
+	ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
+
+	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+	if (err < 0) {
+		fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
+		close(fd);
+		return err;
+	}
+	strcpy(dev, ifr.ifr_name);
+	return fd;
+}
+
+static int tun_delete(char *dev)
+{
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifm;
+		unsigned char    data[64];
+	} req;
+	struct rtattr *rta;
+	int ret, rtnl;
+
+	rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+	if (rtnl < 0) {
+		fprintf(stderr, "can't open rtnl: %s\n", strerror(errno));
+		return 1;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm)));
+	req.nh.nlmsg_flags = NLM_F_REQUEST;
+	req.nh.nlmsg_type = RTM_DELLINK;
+
+	req.ifm.ifi_family = AF_UNSPEC;
+
+	rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
+	rta->rta_type = IFLA_IFNAME;
+	rta->rta_len = RTA_LENGTH(IFNAMSIZ);
+	req.nh.nlmsg_len += rta->rta_len;
+	memcpy(RTA_DATA(rta), dev, IFNAMSIZ);
+
+	ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+	if (ret < 0)
+		fprintf(stderr, "can't send: %s\n", strerror(errno));
+	ret = (unsigned int)ret != req.nh.nlmsg_len;
+
+	close(rtnl);
+	return ret;
+}
+
+FIXTURE(tun)
+{
+	char ifname[IFNAMSIZ];
+	int fd, fd2;
+};
+
+FIXTURE_SETUP(tun)
+{
+	memset(self->ifname, 0, sizeof(self->ifname));
+
+	self->fd = tun_alloc(self->ifname);
+	ASSERT_GE(self->fd, 0);
+
+	self->fd2 = tun_alloc(self->ifname);
+	ASSERT_GE(self->fd2, 0);
+}
+
+FIXTURE_TEARDOWN(tun)
+{
+	if (self->fd >= 0)
+		close(self->fd);
+	if (self->fd2 >= 0)
+		close(self->fd2);
+}
+
+TEST_F(tun, delete_detach_close) {
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
+	EXPECT_EQ(errno, 22);
+}
+
+TEST_F(tun, detach_delete_close) {
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, detach_close_delete) {
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+	close(self->fd);
+	self->fd = -1;
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, reattach_delete_close) {
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+	EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, reattach_close_delete) {
+	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+	EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
+	close(self->fd);
+	self->fd = -1;
+	EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 490a8cca708a..ec60a16c9307 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -26,6 +26,7 @@
 #include <inttypes.h>
 #include <linux/errqueue.h>
 #include <linux/if_ether.h>
+#include <linux/if_packet.h>
 #include <linux/ipv6.h>
 #include <linux/net_tstamp.h>
 #include <netdb.h>
@@ -34,7 +35,6 @@
 #include <netinet/ip.h>
 #include <netinet/udp.h>
 #include <netinet/tcp.h>
-#include <netpacket/packet.h>
 #include <poll.h>
 #include <stdarg.h>
 #include <stdbool.h>
@@ -161,18 +161,19 @@ static void validate_timestamp(struct timespec *cur, int min_delay)
 	max_delay = min_delay + cfg_delay_tolerance_usec;
 
 	if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
-		fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
+		fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n",
 				cur64 - start64, min_delay, max_delay);
-		test_failed = true;
+		if (!getenv("KSFT_MACHINE_SLOW"))
+			test_failed = true;
 	}
 }
 
 static void __print_ts_delta_formatted(int64_t ts_delta)
 {
 	if (cfg_print_nsec)
-		fprintf(stderr, "%lu ns", ts_delta);
+		fprintf(stderr, "%" PRId64 " ns", ts_delta);
 	else
-		fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC);
+		fprintf(stderr, "%" PRId64 " us", ts_delta / NSEC_PER_USEC);
 }
 
 static void __print_timestamp(const char *name, struct timespec *cur,
@@ -495,12 +496,12 @@ static void do_test(int family, unsigned int report_opt)
 	total_len = cfg_payload_len;
 	if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) {
 		total_len += sizeof(struct udphdr);
-		if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW)
+		if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) {
 			if (family == PF_INET)
 				total_len += sizeof(struct iphdr);
 			else
 				total_len += sizeof(struct ipv6hdr);
-
+		}
 		/* special case, only rawv6_sendmsg:
 		 * pass proto in sin6_port if not connected
 		 * also see ANK comment in net/ipv4/raw.c
diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index 31637769f59f..25baca4b148e 100755
--- a/tools/testing/selftests/net/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -8,13 +8,13 @@ set -e
 
 setup() {
 	# set 1ms delay on lo egress
-	tc qdisc add dev lo root netem delay 1ms
+	tc qdisc add dev lo root netem delay 10ms
 
 	# set 2ms delay on ifb0 egress
 	modprobe ifb
 	ip link add ifb_netem0 type ifb
 	ip link set dev ifb_netem0 up
-	tc qdisc add dev ifb_netem0 root netem delay 2ms
+	tc qdisc add dev ifb_netem0 root netem delay 20ms
 
 	# redirect lo ingress through ifb0 egress
 	tc qdisc add dev lo handle ffff: ingress
@@ -24,9 +24,11 @@ setup() {
 }
 
 run_test_v4v6() {
-	# SND will be delayed 1000us
-	# ACK will be delayed 6000us: 1 + 2 ms round-trip
-	local -r args="$@ -v 1000 -V 6000"
+	# SND will be delayed 10ms
+	# ACK will be delayed 60ms: 10 + 20 ms round-trip
+	# allow +/- tolerance of 8ms
+	# wait for ACK to be queued
+	local -r args="$@ -v 10000 -V 60000 -t 8000 -S 80000"
 
 	./txtimestamp ${args} -4 -L 127.0.0.1
 	./txtimestamp ${args} -6 -L ::1
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index ac2a30be9b32..8802604148dd 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,8 +3,20 @@
 #
 # Run a series of udpgro functional tests.
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
+BPF_FILE="xdp_dummy.o"
+
+# set global exit status, but never reset nonzero one.
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
 cleanup() {
 	local -r jobs="$(jobs -p)"
 	local -r ns="$(ip netns list|grep $PEER_NS)"
@@ -26,7 +38,7 @@ cfg_veth() {
 	ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
 	ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
 	ip -netns "${PEER_NS}" link set dev veth1 up
-	ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+	ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
 }
 
 run_one() {
@@ -41,10 +53,11 @@ run_one() {
 		echo "ok" || \
 		echo "failed" &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.1
+	wait_local_port_listen ${PEER_NS} 8000 udp
 	./udpgso_bench_tx ${tx_args}
+	ret=$?
 	wait $(jobs -p)
+	return $ret
 }
 
 run_test() {
@@ -85,10 +98,12 @@ run_one_nat() {
 		echo "ok" || \
 		echo "failed"&
 
-	sleep 0.1
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
+	ret=$?
 	kill -INT $pid
 	wait $(jobs -p)
+	return $ret
 }
 
 run_one_2sock() {
@@ -104,13 +119,13 @@ run_one_2sock() {
 		echo "ok" || \
 		echo "failed" &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.1
+	wait_local_port_listen "${PEER_NS}" 12345 udp
 	./udpgso_bench_tx ${tx_args} -p 12345
-	sleep 0.1
-	# first UDP GSO socket should be closed at this point
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
+	ret=$?
 	wait $(jobs -p)
+	return $ret
 }
 
 run_nat_test() {
@@ -131,40 +146,58 @@ run_all() {
 	local -r core_args="-l 4"
 	local -r ipv4_args="${core_args} -4 -D 192.168.1.1"
 	local -r ipv6_args="${core_args} -6 -D 2001:db8::1"
+	ret=0
 
 	echo "ipv4"
 	run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400"
+	check_err $?
 
 	# explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1)
 	# when GRO does not take place
 	run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1"
+	check_err $?
 
 	# the GSO packets are aggregated because:
 	# * veth schedule napi after each xmit
 	# * segmentation happens in BH context, veth napi poll is delayed after
 	#   the transmission of the last segment
 	run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720"
+	check_err $?
 	run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472"
+	check_err $?
 	run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720"
+	check_err $?
 	run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500"
+	check_err $?
 
 	run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472"
+	check_err $?
 	run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472"
+	check_err $?
 
 	echo "ipv6"
 	run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400"
+	check_err $?
 	run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1"
+	check_err $?
 	run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520"
+	check_err $?
 	run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452"
+	check_err $?
 	run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520"
+	check_err $?
 	run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500"
+	check_err $?
 
 	run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452"
+	check_err $?
 	run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452"
+	check_err $?
+	return $ret
 }
 
-if [ ! -f ../bpf/xdp_dummy.o ]; then
-	echo "Missing xdp_dummy helper. Build bpf selftest first"
+if [ ! -f ${BPF_FILE} ]; then
+	echo "Missing ${BPF_FILE}. Run 'make' first"
 	exit -1
 fi
 
@@ -180,3 +213,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then
 	shift
 	run_one_2sock $@
 fi
+
+exit $?
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 820bc50f6b68..7080eae5312b 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,8 +3,12 @@
 #
 # Run a series of udpgro benchmarks
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
+BPF_FILE="xdp_dummy.o"
+
 cleanup() {
 	local -r jobs="$(jobs -p)"
 	local -r ns="$(ip netns list|grep $PEER_NS)"
@@ -34,12 +38,11 @@ run_one() {
 	ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
 	ip -netns "${PEER_NS}" link set dev veth1 up
 
-	ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+	ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.1
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 }
 
@@ -80,8 +83,8 @@ run_all() {
 	run_udp "${ipv6_args}"
 }
 
-if [ ! -f ../bpf/xdp_dummy.o ]; then
-	echo "Missing xdp_dummy helper. Build bpf selftest first"
+if [ ! -f ${BPF_FILE} ]; then
+	echo "Missing ${BPF_FILE}. Run 'make' first"
 	exit -1
 fi
 
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
new file mode 100755
index 000000000000..e1ff645bd3d1
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro benchmarks
+
+source net_helper.sh
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+BPF_FILE="xdp_dummy.o"
+
+cleanup() {
+	local -r jobs="$(jobs -p)"
+	local -r ns="$(ip netns list|grep $PEER_NS)"
+
+	[ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null
+	[ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+run_one() {
+	# use 'rx' as separator between sender args and receiver args
+	local -r all="$@"
+	local -r tx_args=${all%rx*}
+	local rx_args=${all#*rx}
+
+
+
+	ip netns add "${PEER_NS}"
+	ip -netns "${PEER_NS}" link set lo up
+	ip link add type veth
+	ip link set dev veth0 up
+	ip addr add dev veth0 192.168.1.2/24
+	ip addr add dev veth0 2001:db8::2/64 nodad
+
+	ip link set dev veth1 netns "${PEER_NS}"
+	ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+	ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+	ip -netns "${PEER_NS}" link set dev veth1 up
+	ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
+
+
+	ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
+	tc -n "${PEER_NS}" qdisc add dev veth1 clsact
+	tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6  direct-action
+	tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action
+        echo ${rx_args}
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+
+	wait_local_port_listen "${PEER_NS}" 8000 udp
+	./udpgso_bench_tx ${tx_args}
+}
+
+run_in_netns() {
+	local -r args=$@
+  echo ${args}
+	./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+	local -r args=$@
+
+	echo "udp gso - over veth touching data"
+	run_in_netns ${args} -u -S 0 rx -4 -v
+
+	echo "udp gso and gro - over veth touching data"
+	run_in_netns ${args} -S 0 rx -4 -G
+}
+
+run_tcp() {
+	local -r args=$@
+
+	echo "tcp - over veth touching data"
+	run_in_netns ${args} -t rx -4 -t
+}
+
+run_all() {
+	local -r core_args="-l 4"
+	local -r ipv4_args="${core_args} -4  -D 192.168.1.1"
+	local -r ipv6_args="${core_args} -6  -D 2001:db8::1"
+
+	echo "ipv6"
+	run_tcp "${ipv6_args}"
+	run_udp "${ipv6_args}"
+}
+
+if [ ! -f ${BPF_FILE} ]; then
+	echo "Missing ${BPF_FILE}. Run 'make' first"
+	exit -1
+fi
+
+if [ ! -f nat6to4.o ]; then
+	echo "Missing nat6to4 helper. Run 'make' first"
+	exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+	run_all
+elif [[ $1 == "__subprocess" ]]; then
+	shift
+	run_one $@
+else
+	run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
new file mode 100755
index 000000000000..380cb15e942e
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source net_helper.sh
+
+BPF_FILE="xdp_dummy.o"
+readonly BASE="ns-$(mktemp -u XXXXXX)"
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+# "overlay" network used for UDP over UDP tunnel traffic
+readonly OL_NET_V4=172.16.1.
+readonly OL_NET_V6=2001:db8:1::
+readonly NPROCS=`nproc`
+
+cleanup() {
+	local ns
+	local -r jobs="$(jobs -p)"
+	[ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+
+	for ns in $NS_SRC $NS_DST; do
+		ip netns del $ns 2>/dev/null
+	done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+	local net
+	local ns
+
+	for ns in $NS_SRC $NS_DST; do
+		ip netns add $ns
+		ip -n $ns link set dev lo up
+
+		# disable route solicitations to decrease 'noise' traffic
+		ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0
+		ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0
+	done
+
+	ip link add name veth$SRC type veth peer name veth$DST
+
+	for ns in $SRC $DST; do
+		ip link set dev veth$ns netns $BASE$ns
+		ip -n $BASE$ns link set dev veth$ns up
+		ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+		ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+	done
+	ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null
+}
+
+create_vxlan_endpoint() {
+	local -r netns=$1
+	local -r bm_dev=$2
+	local -r bm_rem_addr=$3
+	local -r vxlan_dev=$4
+	local -r vxlan_id=$5
+	local -r vxlan_port=4789
+
+	ip -n $netns link set dev $bm_dev up
+	ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \
+				dstport $vxlan_port remote $bm_rem_addr
+	ip -n $netns link set dev $vxlan_dev up
+}
+
+create_vxlan_pair() {
+	local ns
+
+	create_ns
+
+	for ns in $SRC $DST; do
+		# note that 3 - $SRC == $DST and 3 - $DST == $SRC
+		create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4
+		ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24
+	done
+	for ns in $SRC $DST; do
+		create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6
+		ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad
+	done
+
+	# preload neighbur cache, do avoid some noisy traffic
+	local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST  |jq -r '.[]["address"]')
+	local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC  |jq -r '.[]["address"]')
+	ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC
+	ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST
+}
+
+is_ipv6() {
+	if [[ $1 =~ .*:.* ]]; then
+		return 0
+	fi
+	return 1
+}
+
+run_test() {
+	local -r msg=$1
+	local -r dst=$2
+	local -r pkts=$3
+	local -r vxpkts=$4
+	local bind=$5
+	local rx_args=""
+	local rx_family="-4"
+	local family=-4
+	local filter=IpInReceives
+	local ipt=iptables
+
+	printf "%-40s" "$msg"
+
+	if is_ipv6 $dst; then
+		# rx program does not support '-6' and implies ipv6 usage by default
+		rx_family=""
+		family=-6
+		filter=Ip6InReceives
+		ipt=ip6tables
+	fi
+
+	rx_args="$rx_family"
+	[ -n "$bind" ] && rx_args="$rx_args -b $bind"
+
+	# send a single GSO packet, segmented in 10 UDP frames.
+	# Always expect 10 UDP frames on RX side as rx socket does
+	# not enable GRO
+	ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789
+	ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000
+	ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args &
+	local spid=$!
+	wait_local_port_listen "$NS_DST" 8000 udp
+	ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst
+	local retc=$?
+	wait $spid
+	local rets=$?
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		echo " fail client exit code $retc, server $rets"
+		ret=1
+		return
+	fi
+
+	local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
+							  sed -e 's/\[//' -e 's/:.*//'`
+	if [ $rcv != $pkts ]; then
+		echo " fail - received $rcv packets, expected $pkts"
+		ret=1
+		return
+	fi
+
+	local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \
+							    sed -e 's/\[//' -e 's/:.*//'`
+
+	# upper net can generate a little noise, allow some tolerance
+	if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then
+		echo " fail - received $vxrcv vxlan packets, expected $vxpkts"
+		ret=1
+		return
+	fi
+	echo " ok"
+}
+
+run_bench() {
+	local -r msg=$1
+	local -r dst=$2
+	local family=-4
+
+	printf "%-40s" "$msg"
+	if [ $NPROCS -lt 2 ]; then
+		echo " skip - needed 2 CPUs found $NPROCS"
+		return
+	fi
+
+	is_ipv6 $dst && family=-6
+
+	# bind the sender and the receiver to different CPUs to try
+	# get reproducible results
+	ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus"
+	ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100  &
+	local spid=$!
+	wait_local_port_listen "$NS_DST" 8000 udp
+	ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst
+	local retc=$?
+	wait $spid
+	local rets=$?
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		echo " fail client exit code $retc, server $rets"
+		ret=1
+		return
+	fi
+}
+
+for family in 4 6; do
+	BM_NET=$BM_NET_V4
+	OL_NET=$OL_NET_V4
+	IPT=iptables
+	SUFFIX=24
+	VXDEV=vxlan
+	PING=ping
+
+	if [ $family = 6 ]; then
+		BM_NET=$BM_NET_V6
+		OL_NET=$OL_NET_V6
+		SUFFIX="64 nodad"
+		VXDEV=vxlan6
+		IPT=ip6tables
+		# Use ping6 on systems where ping doesn't handle IPv6
+		ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING="ping6"
+	fi
+
+	echo "IPv$family"
+
+	create_ns
+	run_test "No GRO" $BM_NET$DST 10 0
+	cleanup
+
+	create_ns
+	ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+	ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+	run_test "GRO frag list" $BM_NET$DST 1 0
+	cleanup
+
+	# UDP GRO fwd skips aggregation when find an udp socket with the GRO option
+	# if there is an UDP tunnel in the running system, such lookup happen
+	# take place.
+	# use NAT to circumvent GRO FWD check
+	create_ns
+	ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX
+	ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+	ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \
+					-j DNAT --to-destination $BM_NET$DST
+	run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST
+	cleanup
+
+	create_ns
+	run_bench "UDP fwd perf" $BM_NET$DST
+	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+	run_bench "UDP GRO fwd perf" $BM_NET$DST
+	cleanup
+
+	create_vxlan_pair
+	ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+	ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+	run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+	cleanup
+
+	# use NAT to circumvent GRO FWD check
+	create_vxlan_pair
+	ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX
+	ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+	ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \
+					-j DNAT --to-destination $OL_NET$DST
+
+	# load arp cache before running the test to reduce the amount of
+	# stray traffic on top of the UDP tunnel
+	ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
+	run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+	cleanup
+
+	create_vxlan_pair
+	run_bench "UDP tunnel fwd perf" $OL_NET$DST
+	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+	run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+	cleanup
+done
+
+exit $ret
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index c66da6ffd6d8..1d975bf52af3 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -56,7 +56,6 @@ static bool		cfg_do_msgmore;
 static bool		cfg_do_setsockopt;
 static int		cfg_specific_test_id = -1;
 
-static const char	cfg_ifname[] = "lo";
 static unsigned short	cfg_port = 9000;
 
 static char buf[ETH_MAX_MTU];
@@ -69,8 +68,13 @@ struct testcase {
 	int r_len_last;		/* recv(): size of last non-mss dgram, if any */
 };
 
-const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
-const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+const struct in6_addr addr6 = {
+	{ { 0xfd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, /* fd00::1 */
+};
+
+const struct in_addr addr4 = {
+	__constant_htonl(0x0a000001), /* 10.0.0.1 */
+};
 
 struct testcase testcases_v4[] = {
 	{
@@ -156,13 +160,13 @@ struct testcase testcases_v4[] = {
 	},
 	{
 		/* send max number of min sized segments */
-		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+		.tlen = UDP_MAX_SEGMENTS,
 		.gso_len = 1,
-		.r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+		.r_num_mss = UDP_MAX_SEGMENTS,
 	},
 	{
 		/* send max number + 1 of min sized segments: fail */
-		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+		.tlen = UDP_MAX_SEGMENTS + 1,
 		.gso_len = 1,
 		.tfail = true,
 	},
@@ -259,13 +263,13 @@ struct testcase testcases_v6[] = {
 	},
 	{
 		/* send max number of min sized segments */
-		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+		.tlen = UDP_MAX_SEGMENTS,
 		.gso_len = 1,
-		.r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+		.r_num_mss = UDP_MAX_SEGMENTS,
 	},
 	{
 		/* send max number + 1 of min sized segments: fail */
-		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+		.tlen = UDP_MAX_SEGMENTS + 1,
 		.gso_len = 1,
 		.tfail = true,
 	},
@@ -274,48 +278,6 @@ struct testcase testcases_v6[] = {
 	}
 };
 
-static unsigned int get_device_mtu(int fd, const char *ifname)
-{
-	struct ifreq ifr;
-
-	memset(&ifr, 0, sizeof(ifr));
-
-	strcpy(ifr.ifr_name, ifname);
-
-	if (ioctl(fd, SIOCGIFMTU, &ifr))
-		error(1, errno, "ioctl get mtu");
-
-	return ifr.ifr_mtu;
-}
-
-static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
-{
-	struct ifreq ifr;
-
-	memset(&ifr, 0, sizeof(ifr));
-
-	ifr.ifr_mtu = mtu;
-	strcpy(ifr.ifr_name, ifname);
-
-	if (ioctl(fd, SIOCSIFMTU, &ifr))
-		error(1, errno, "ioctl set mtu");
-}
-
-static void set_device_mtu(int fd, int mtu)
-{
-	int val;
-
-	val = get_device_mtu(fd, cfg_ifname);
-	fprintf(stderr, "device mtu (orig): %u\n", val);
-
-	__set_device_mtu(fd, cfg_ifname, mtu);
-	val = get_device_mtu(fd, cfg_ifname);
-	if (val != mtu)
-		error(1, 0, "unable to set device mtu to %u\n", val);
-
-	fprintf(stderr, "device mtu (test): %u\n", val);
-}
-
 static void set_pmtu_discover(int fd, bool is_ipv4)
 {
 	int level, name, val;
@@ -354,81 +316,6 @@ static unsigned int get_path_mtu(int fd, bool is_ipv4)
 	return mtu;
 }
 
-/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
-static void set_route_mtu(int mtu, bool is_ipv4)
-{
-	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
-	struct nlmsghdr *nh;
-	struct rtattr *rta;
-	struct rtmsg *rt;
-	char data[NLMSG_ALIGN(sizeof(*nh)) +
-		  NLMSG_ALIGN(sizeof(*rt)) +
-		  NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
-		  NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
-		  NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
-	int fd, ret, alen, off = 0;
-
-	alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
-
-	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
-	if (fd == -1)
-		error(1, errno, "socket netlink");
-
-	memset(data, 0, sizeof(data));
-
-	nh = (void *)data;
-	nh->nlmsg_type = RTM_NEWROUTE;
-	nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
-	off += NLMSG_ALIGN(sizeof(*nh));
-
-	rt = (void *)(data + off);
-	rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
-	rt->rtm_table = RT_TABLE_MAIN;
-	rt->rtm_dst_len = alen << 3;
-	rt->rtm_protocol = RTPROT_BOOT;
-	rt->rtm_scope = RT_SCOPE_UNIVERSE;
-	rt->rtm_type = RTN_UNICAST;
-	off += NLMSG_ALIGN(sizeof(*rt));
-
-	rta = (void *)(data + off);
-	rta->rta_type = RTA_DST;
-	rta->rta_len = RTA_LENGTH(alen);
-	if (is_ipv4)
-		memcpy(RTA_DATA(rta), &addr4, alen);
-	else
-		memcpy(RTA_DATA(rta), &addr6, alen);
-	off += NLMSG_ALIGN(rta->rta_len);
-
-	rta = (void *)(data + off);
-	rta->rta_type = RTA_OIF;
-	rta->rta_len = RTA_LENGTH(sizeof(int));
-	*((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
-	off += NLMSG_ALIGN(rta->rta_len);
-
-	/* MTU is a subtype in a metrics type */
-	rta = (void *)(data + off);
-	rta->rta_type = RTA_METRICS;
-	rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
-	off += NLMSG_ALIGN(rta->rta_len);
-
-	/* now fill MTU subtype. Note that it fits within above rta_len */
-	rta = (void *)(((char *) rta) + RTA_LENGTH(0));
-	rta->rta_type = RTAX_MTU;
-	rta->rta_len = RTA_LENGTH(sizeof(int));
-	*((int *)(RTA_DATA(rta))) = mtu;
-
-	nh->nlmsg_len = off;
-
-	ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
-	if (ret != off)
-		error(1, errno, "send netlink: %uB != %uB\n", ret, off);
-
-	if (close(fd))
-		error(1, errno, "close netlink");
-
-	fprintf(stderr, "route mtu (test): %u\n", mtu);
-}
-
 static bool __send_one(int fd, struct msghdr *msg, int flags)
 {
 	int ret;
@@ -591,15 +478,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen)
 	/* Do not fragment these datagrams: only succeed if GSO works */
 	set_pmtu_discover(fdt, addr->sa_family == AF_INET);
 
-	if (cfg_do_connectionless) {
-		set_device_mtu(fdt, CONST_MTU_TEST);
+	if (cfg_do_connectionless)
 		run_all(fdt, fdr, addr, alen);
-	}
 
 	if (cfg_do_connected) {
-		set_device_mtu(fdt, CONST_MTU_TEST + 100);
-		set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
-
 		if (connect(fdt, addr, alen))
 			error(1, errno, "connect");
 
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
index fec24f584fe9..6c63178086b0 100755
--- a/tools/testing/selftests/net/udpgso.sh
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -3,27 +3,56 @@
 #
 # Run a series of udpgso regression tests
 
+set -o errexit
+set -o nounset
+
+setup_loopback() {
+	ip addr add dev lo 10.0.0.1/32
+	ip addr add dev lo fd00::1/128 nodad noprefixroute
+}
+
+test_dev_mtu() {
+	setup_loopback
+	# Reduce loopback MTU
+	ip link set dev lo mtu 1500
+}
+
+test_route_mtu() {
+	setup_loopback
+	# Remove default local routes
+	ip route del local 10.0.0.1/32 table local dev lo
+	ip route del local fd00::1/128 table local dev lo
+	# Install local routes with reduced MTU
+	ip route add local 10.0.0.1/32 table local dev lo mtu 1500
+	ip route add local fd00::1/128 table local dev lo mtu 1500
+}
+
+if [ "$#" -gt 0 ]; then
+	"$1"
+	shift 2 # pop "test_*" arg and "--" delimiter
+	exec "$@"
+fi
+
 echo "ipv4 cmsg"
-./in_netns.sh ./udpgso -4 -C
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C
 
 echo "ipv4 setsockopt"
-./in_netns.sh ./udpgso -4 -C -s
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -s
 
 echo "ipv6 cmsg"
-./in_netns.sh ./udpgso -6 -C
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C
 
 echo "ipv6 setsockopt"
-./in_netns.sh ./udpgso -6 -C -s
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -s
 
 echo "ipv4 connected"
-./in_netns.sh ./udpgso -4 -c
+./in_netns.sh "$0" test_route_mtu -- ./udpgso -4 -c
 
-# blocked on 2nd loopback address
-# echo "ipv6 connected"
-# ./in_netns.sh ./udpgso -6 -c
+echo "ipv6 connected"
+./in_netns.sh "$0" test_route_mtu -- ./udpgso -6 -c
 
 echo "ipv4 msg_more"
-./in_netns.sh ./udpgso -4 -C -m
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -m
 
 echo "ipv6 msg_more"
-./in_netns.sh ./udpgso -6 -C -m
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 80b5d352702e..640bc43452fa 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -7,6 +7,7 @@ readonly GREEN='\033[0;92m'
 readonly YELLOW='\033[0;33m'
 readonly RED='\033[0;31m'
 readonly NC='\033[0m' # No Color
+readonly TESTPORT=8000
 
 readonly KSFT_PASS=0
 readonly KSFT_FAIL=1
@@ -56,11 +57,26 @@ trap wake_children EXIT
 
 run_one() {
 	local -r args=$@
+	local nr_socks=0
+	local i=0
+	local -r timeout=10
+
+	./udpgso_bench_rx -p "$TESTPORT" &
+	./udpgso_bench_rx -p "$TESTPORT" -t &
+
+	# Wait for the above test program to get ready to receive connections.
+	while [ "$i" -lt "$timeout" ]; do
+		nr_socks="$(ss -lnHi | grep -c "\*:${TESTPORT}")"
+		[ "$nr_socks" -eq 2 ] && break
+		i=$((i + 1))
+		sleep 1
+	done
+	if [ "$nr_socks" -ne 2 ]; then
+		echo "timed out while waiting for udpgso_bench_rx"
+		exit 1
+	fi
 
-	./udpgso_bench_rx &
-	./udpgso_bench_rx -t &
-
-	./udpgso_bench_tx ${args}
+	./udpgso_bench_tx -p "$TESTPORT" ${args}
 }
 
 run_in_netns() {
@@ -120,7 +136,7 @@ run_all() {
 	run_udp "${ipv4_args}"
 
 	echo "ipv6"
-	run_tcp "${ipv4_args}"
+	run_tcp "${ipv6_args}"
 	run_udp "${ipv6_args}"
 }
 
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index db3d4a8b5a4c..1cbadd267c96 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms)
 				interrupted = true;
 				break;
 			}
+
+			/* no events and more time to wait, do poll again */
+			continue;
 		}
 		if (pfd.revents != POLLIN)
 			error(1, errno, "poll: 0x%x expected 0x%x\n",
@@ -211,11 +214,10 @@ static void do_verify_udp(const char *data, int len)
 
 static int recv_msg(int fd, char *buf, int len, int *gso_size)
 {
-	char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+	char control[CMSG_SPACE(sizeof(int))] = {0};
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
 	struct cmsghdr *cmsg;
-	uint16_t *gsosizeptr;
 	int ret;
 
 	iov.iov_base = buf;
@@ -234,8 +236,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size)
 		     cmsg = CMSG_NXTHDR(&msg, cmsg)) {
 			if (cmsg->cmsg_level == SOL_UDP
 			    && cmsg->cmsg_type == UDP_GRO) {
-				gsosizeptr = (uint16_t *) CMSG_DATA(cmsg);
-				*gso_size = *gsosizeptr;
+				*gso_size = *(int *)CMSG_DATA(cmsg);
 				break;
 			}
 		}
@@ -247,7 +248,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size)
 static void do_flush_udp(int fd)
 {
 	static char rbuf[ETH_MAX_MTU];
-	int ret, len, gso_size, budget = 256;
+	int ret, len, gso_size = 0, budget = 256;
 
 	len = cfg_read_all ? sizeof(rbuf) : 0;
 	while (budget--) {
@@ -290,19 +291,17 @@ static void usage(const char *filepath)
 
 static void parse_opts(int argc, char **argv)
 {
+	const char *bind_addr = NULL;
 	int c;
 
-	/* bind to any by default */
-	setup_sockaddr(PF_INET6, "::", &cfg_bind_addr);
 	while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) {
 		switch (c) {
 		case '4':
 			cfg_family = PF_INET;
 			cfg_alen = sizeof(struct sockaddr_in);
-			setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr);
 			break;
 		case 'b':
-			setup_sockaddr(cfg_family, optarg, &cfg_bind_addr);
+			bind_addr = optarg;
 			break;
 		case 'C':
 			cfg_connect_timeout_ms = strtoul(optarg, NULL, 0);
@@ -335,9 +334,16 @@ static void parse_opts(int argc, char **argv)
 			cfg_verify = true;
 			cfg_read_all = true;
 			break;
+		default:
+			exit(1);
 		}
 	}
 
+	if (!bind_addr)
+		bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+	setup_sockaddr(cfg_family, bind_addr, &cfg_bind_addr);
+
 	if (optind != argc)
 		usage(argv[0]);
 
@@ -369,7 +375,7 @@ static void do_recv(void)
 			do_flush_udp(fd);
 
 		tnow = gettimeofday_ms();
-		if (tnow > treport) {
+		if (!cfg_expected_pkt_nr && tnow > treport) {
 			if (packets)
 				fprintf(stderr,
 					"%s rx: %6lu MB/s %8lu calls/s\n",
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 17512a43885e..477392715a9a 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -62,6 +62,7 @@ static int	cfg_payload_len	= (1472 * 42);
 static int	cfg_port	= 8000;
 static int	cfg_runtime_ms	= -1;
 static bool	cfg_poll;
+static int	cfg_poll_loop_timeout_ms = 2000;
 static bool	cfg_segment;
 static bool	cfg_sendmmsg;
 static bool	cfg_tcp;
@@ -235,16 +236,17 @@ static void flush_errqueue_recv(int fd)
 	}
 }
 
-static void flush_errqueue(int fd, const bool do_poll)
+static void flush_errqueue(int fd, const bool do_poll,
+			   unsigned long poll_timeout, const bool poll_err)
 {
 	if (do_poll) {
 		struct pollfd fds = {0};
 		int ret;
 
 		fds.fd = fd;
-		ret = poll(&fds, 1, 500);
+		ret = poll(&fds, 1, poll_timeout);
 		if (ret == 0) {
-			if (cfg_verbose)
+			if ((cfg_verbose) && (poll_err))
 				fprintf(stderr, "poll timeout\n");
 		} else if (ret < 0) {
 			error(1, errno, "poll");
@@ -254,6 +256,20 @@ static void flush_errqueue(int fd, const bool do_poll)
 	flush_errqueue_recv(fd);
 }
 
+static void flush_errqueue_retry(int fd, unsigned long num_sends)
+{
+	unsigned long tnow, tstop;
+	bool first_try = true;
+
+	tnow = gettimeofday_ms();
+	tstop = tnow + cfg_poll_loop_timeout_ms;
+	do {
+		flush_errqueue(fd, true, tstop - tnow, first_try);
+		first_try = false;
+		tnow = gettimeofday_ms();
+	} while ((stat_zcopies != num_sends) && (tnow < tstop));
+}
+
 static int send_tcp(int fd, char *data)
 {
 	int ret, done = 0, count = 0;
@@ -413,16 +429,18 @@ static int send_udp_segment(int fd, char *data)
 
 static void usage(const char *filepath)
 {
-	error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
+	error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] "
+		    "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
 		    filepath);
 }
 
 static void parse_opts(int argc, char **argv)
 {
+	const char *bind_addr = NULL;
 	int max_len, hdrlen;
 	int c;
 
-	while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) {
+	while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) {
 		switch (c) {
 		case '4':
 			if (cfg_family != PF_UNSPEC)
@@ -446,11 +464,14 @@ static void parse_opts(int argc, char **argv)
 			cfg_cpu = strtol(optarg, NULL, 0);
 			break;
 		case 'D':
-			setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+			bind_addr = optarg;
 			break;
 		case 'l':
 			cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
 			break;
+		case 'L':
+			cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000;
+			break;
 		case 'm':
 			cfg_sendmmsg = true;
 			break;
@@ -489,9 +510,16 @@ static void parse_opts(int argc, char **argv)
 		case 'z':
 			cfg_zerocopy = true;
 			break;
+		default:
+			exit(1);
 		}
 	}
 
+	if (!bind_addr)
+		bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+	setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
 	if (optind != argc)
 		usage(argv[0]);
 
@@ -671,7 +699,7 @@ int main(int argc, char **argv)
 			num_sends += send_udp(fd, buf[i]);
 		num_msgs++;
 		if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp)
-			flush_errqueue(fd, cfg_poll);
+			flush_errqueue(fd, cfg_poll, 500, true);
 
 		if (cfg_msg_nr && num_msgs >= cfg_msg_nr)
 			break;
@@ -690,7 +718,7 @@ int main(int argc, char **argv)
 	} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
 
 	if (cfg_zerocopy || cfg_tx_tstamp)
-		flush_errqueue(fd, true);
+		flush_errqueue_retry(fd, num_sends);
 
 	if (close(fd))
 		error(1, errno, "close");
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
new file mode 100755
index 000000000000..f52aa5f7da52
--- /dev/null
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -0,0 +1,225 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project
+# Thanks to David Ahern for help and advice on nettest modifications.
+#
+# Self-tests for IPv4 address extensions: the kernel's ability to accept
+# certain traditionally unused or unallocated IPv4 addresses. For each kind
+# of address, we test for interface assignment, ping, TCP, and forwarding.
+# Must be run as root (to manipulate network namespaces and virtual
+# interfaces).
+#
+# Things we test for here:
+#
+# * Currently the kernel accepts addresses in 0/8 and 240/4 as valid.
+#
+# * Notwithstanding that, 0.0.0.0 and 255.255.255.255 cannot be assigned.
+#
+# * Currently the kernel DOES NOT accept unicast use of the lowest
+#   address in an IPv4 subnet (e.g. 192.168.100.0/32 in 192.168.100.0/24).
+#   This is treated as a second broadcast address, for compatibility
+#   with 4.2BSD (!).
+#
+# * Currently the kernel DOES NOT accept unicast use of any of 127/8.
+#
+# * Currently the kernel DOES NOT accept unicast use of any of 224/4.
+#
+# These tests provide an easy way to flip the expected result of any
+# of these behaviors for testing kernel patches that change them.
+
+source lib.sh
+
+# nettest can be run from PATH or from same directory as this selftest
+if ! which nettest >/dev/null; then
+	PATH=$PWD:$PATH
+	if ! which nettest >/dev/null; then
+		echo "'nettest' command not found; skipping tests"
+		exit $ksft_skip
+	fi
+fi
+
+result=0
+
+hide_output(){ exec 3>&1 4>&2 >/dev/null 2>/dev/null; }
+show_output(){ exec >&3 2>&4; }
+
+show_result(){
+	if [ $1 -eq 0 ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${2}"
+	else
+		printf "TEST: %-60s  [FAIL]\n" "${2}"
+		result=1
+	fi
+}
+
+_do_segmenttest(){
+	# Perform a simple set of link tests between a pair of
+	# IP addresses on a shared (virtual) segment, using
+	# ping and nettest.
+	# foo --- bar
+	# Arguments: ip_a ip_b prefix_length test_description
+	#
+	# Caller must set up $foo_ns and $bar_ns namespaces
+	# containing linked veth devices foo and bar,
+	# respectively.
+
+	ip -n $foo_ns address add $1/$3 dev foo || return 1
+	ip -n $foo_ns link set foo up || return 1
+	ip -n $bar_ns address add $2/$3 dev bar || return 1
+	ip -n $bar_ns link set bar up || return 1
+
+	ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1
+	ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1
+
+	nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1
+	nettest -B -N $foo_ns -O $bar_ns -r $2 || return 1
+
+	return 0
+}
+
+_do_route_test(){
+	# Perform a simple set of gateway tests.
+	#
+	# [foo] <---> [foo1]-[bar1] <---> [bar]   /prefix
+	#  host          gateway          host
+	#
+	# Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
+	# Displays test result and returns success or failure.
+
+	# Caller must set up $foo_ns, $bar_ns, and $router_ns
+	# containing linked veth devices foo-foo1, bar1-bar
+	# (foo in $foo_ns, foo1 and bar1 in $router_ns, and
+	# bar in $bar_ns).
+
+	ip -n $foo_ns address add $1/$5 dev foo || return 1
+	ip -n $foo_ns link set foo up || return 1
+	ip -n $foo_ns route add default via $2 || return 1
+	ip -n $bar_ns address add $4/$5 dev bar || return 1
+	ip -n $bar_ns link set bar up || return 1
+	ip -n $bar_ns route add default via $3 || return 1
+	ip -n $router_ns address add $2/$5 dev foo1 || return 1
+	ip -n $router_ns link set foo1 up || return 1
+	ip -n $router_ns address add $3/$5 dev bar1 || return 1
+	ip -n $router_ns link set bar1 up || return 1
+
+	echo 1 | ip netns exec $router_ns tee /proc/sys/net/ipv4/ip_forward
+
+	ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1
+	ip netns exec $foo_ns timeout 2 ping -c 1 $4 || return 1
+	ip netns exec $bar_ns timeout 2 ping -c 1 $3 || return 1
+	ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1
+
+	nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1
+	nettest -B -N $foo_ns -O $bar_ns -r $4 || return 1
+
+	return 0
+}
+
+segmenttest(){
+	# Sets up veth link and tries to connect over it.
+	# Arguments: ip_a ip_b prefix_len test_description
+	hide_output
+	setup_ns foo_ns bar_ns
+	ip link add foo netns $foo_ns type veth peer name bar netns $bar_ns
+
+	test_result=0
+	_do_segmenttest "$@" || test_result=1
+
+	ip netns pids $foo_ns | xargs -r kill -9
+	ip netns pids $bar_ns | xargs -r kill -9
+	cleanup_ns $foo_ns $bar_ns
+	show_output
+
+	# inverted tests will expect failure instead of success
+	[ -n "$expect_failure" ] && test_result=`expr 1 - $test_result`
+
+	show_result $test_result "$4"
+}
+
+route_test(){
+	# Sets up a simple gateway and tries to connect through it.
+	# [foo] <---> [foo1]-[bar1] <---> [bar]   /prefix
+	# Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
+	# Returns success or failure.
+
+	hide_output
+	setup_ns foo_ns bar_ns router_ns
+	ip link add foo netns $foo_ns type veth peer name foo1 netns $router_ns
+	ip link add bar netns $bar_ns type veth peer name bar1 netns $router_ns
+
+	test_result=0
+	_do_route_test "$@" || test_result=1
+
+	ip netns pids $foo_ns | xargs -r kill -9
+	ip netns pids $bar_ns | xargs -r kill -9
+	ip netns pids $router_ns | xargs -r kill -9
+	cleanup_ns $foo_ns $bar_ns $router_ns
+
+	show_output
+
+	# inverted tests will expect failure instead of success
+	[ -n "$expect_failure" ] && test_result=`expr 1 - $test_result`
+	show_result $test_result "$6"
+}
+
+echo "###########################################################################"
+echo "Unicast address extensions tests (behavior of reserved IPv4 addresses)"
+echo "###########################################################################"
+#
+# Test support for 240/4
+segmenttest 240.1.2.1   240.1.2.4    24 "assign and ping within 240/4 (1 of 2) (is allowed)"
+segmenttest 250.100.2.1 250.100.30.4 16 "assign and ping within 240/4 (2 of 2) (is allowed)"
+#
+# Test support for 0/8
+segmenttest 0.1.2.17    0.1.2.23  24 "assign and ping within 0/8 (1 of 2) (is allowed)"
+segmenttest 0.77.240.17 0.77.2.23 16 "assign and ping within 0/8 (2 of 2) (is allowed)"
+#
+# Even 255.255/16 is OK!
+segmenttest 255.255.3.1 255.255.50.77 16 "assign and ping inside 255.255/16 (is allowed)"
+#
+# Or 255.255.255/24
+segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255/24 (is allowed)"
+#
+# Routing between different networks
+route_test 240.5.6.7 240.5.6.1  255.1.2.1    255.1.2.3      24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)"
+route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)"
+#
+# Test support for lowest address ending in .0
+segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)"
+#
+# Test support for lowest address not ending in .0
+segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)"
+#
+# Routing using lowest address as a gateway/endpoint
+route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address"
+#
+# ==============================================
+# ==== TESTS THAT CURRENTLY EXPECT FAILURE =====
+# ==============================================
+expect_failure=true
+# It should still not be possible to use 0.0.0.0 or 255.255.255.255
+# as a unicast address.  Thus, these tests expect failure.
+segmenttest 0.0.1.5       0.0.0.0         16 "assigning 0.0.0.0 (is forbidden)"
+segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forbidden)"
+#
+# Test support for not having all of 127 be loopback
+# Currently Linux does not allow this, so this should fail too
+segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)"
+#
+# Test support for unicast use of class D
+# Currently Linux does not allow this, so this should fail too
+segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)"
+#
+# Routing using class D as a gateway
+route_test 225.1.42.1 225.1.42.2 9.8.7.6 9.8.7.1 24 "routing using class D (is forbidden)"
+#
+# Routing using 127/8
+# Currently Linux does not allow this, so this should fail too
+route_test 127.99.2.3 127.99.2.4 200.1.2.3 200.1.2.4 24 "routing using 127/8 (is forbidden)"
+#
+unset expect_failure
+# =====================================================
+# ==== END OF TESTS THAT CURRENTLY EXPECT FAILURE =====
+# =====================================================
+exit ${result}
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
new file mode 100755
index 000000000000..3a394b43e274
--- /dev/null
+++ b/tools/testing/selftests/net/veth.sh
@@ -0,0 +1,392 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+BPF_FILE="xdp_dummy.o"
+readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
+readonly BASE=`basename $STATS`
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+readonly CPUS=`nproc`
+ret=0
+
+cleanup() {
+	local ns
+	local jobs
+	readonly jobs="$(jobs -p)"
+	[ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+	rm -f $STATS
+
+	for ns in $NS_SRC $NS_DST; do
+		ip netns del $ns 2>/dev/null
+	done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+	local ns
+
+	for ns in $NS_SRC $NS_DST; do
+		ip netns add $ns
+		ip -n $ns link set dev lo up
+	done
+
+	ip link add name veth$SRC type veth peer name veth$DST
+
+	for ns in $SRC $DST; do
+		ip link set dev veth$ns netns $BASE$ns up
+		ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+		ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+	done
+	echo "#kernel" > $BASE
+	chmod go-rw $BASE
+}
+
+__chk_flag() {
+	local msg="$1"
+	local target=$2
+	local expected=$3
+	local flagname=$4
+
+	local flag=`ip netns exec $BASE$target ethtool -k veth$target |\
+		    grep $flagname | awk '{print $2}'`
+
+	printf "%-60s" "$msg"
+	if [ "$flag" = "$expected" ]; then
+		echo " ok "
+	else
+		echo " fail - expected $expected found $flag"
+		ret=1
+	fi
+}
+
+chk_gro_flag() {
+	__chk_flag "$1" $2 $3 generic-receive-offload
+}
+
+chk_tso_flag() {
+	__chk_flag "$1" $2 $3 tcp-segmentation-offload
+}
+
+chk_channels() {
+	local msg="$1"
+	local target=$2
+	local rx=$3
+	local tx=$4
+
+	local dev=veth$target
+
+	local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+		grep RX: | tail -n 1 | awk '{print $2}' `
+		local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+		grep TX: | tail -n 1 | awk '{print $2}'`
+	local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+		grep Combined: | tail -n 1 | awk '{print $2}'`
+
+	printf "%-60s" "$msg"
+	if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+		echo " ok "
+	else
+		echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+	fi
+}
+
+chk_gro() {
+	local msg="$1"
+	local expected=$2
+
+	ip netns exec $BASE$SRC ping -qc 1 $BM_NET_V4$DST >/dev/null
+	NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat -n
+
+	printf "%-60s" "$msg"
+	ip netns exec $BASE$DST ./udpgso_bench_rx -C 1000 -R 10 &
+	local spid=$!
+	sleep 0.1
+
+	ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 13000 -S 1300 -M 1 -D $BM_NET_V4$DST
+	local retc=$?
+	wait $spid
+	local rets=$?
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		echo " fail client exit code $retc, server $rets"
+		ret=1
+		return
+	fi
+
+	local pkts=`NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat IpInReceives | \
+		    awk '{print $2}' | tail -n 1`
+	if [ "$pkts" = "$expected" ]; then
+		echo " ok "
+	else
+		echo " fail - got $pkts packets, expected $expected "
+		ret=1
+	fi
+}
+
+__change_channels()
+{
+	local cur_cpu
+	local end=$1
+	local cur
+	local i
+
+	while true; do
+		printf -v cur '%(%s)T'
+		[ $cur -le $end ] || break
+
+		for i in `seq 1 $CPUS`; do
+			ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+			ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+		done
+
+		for i in `seq 1 $((CPUS - 1))`; do
+			cur_cpu=$((CPUS - $i))
+			ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+			ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+		done
+	done
+}
+
+__send_data() {
+	local end=$1
+
+	while true; do
+		printf -v cur '%(%s)T'
+		[ $cur -le $end ] || break
+
+		ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+	done
+}
+
+do_stress() {
+	local end
+	printf -v end '%(%s)T'
+	end=$((end + $STRESS))
+
+	ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+	ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+	ip netns exec $NS_DST ./udpgso_bench_rx &
+	local rx_pid=$!
+
+	echo "Running stress test for $STRESS seconds..."
+	__change_channels $end &
+	local ch_pid=$!
+	__send_data $end &
+	local data_pid_1=$!
+	__send_data $end &
+	local data_pid_2=$!
+	__send_data $end &
+	local data_pid_3=$!
+	__send_data $end &
+	local data_pid_4=$!
+
+	wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+	kill -9 $rx_pid
+	echo "done"
+
+	# restore previous setting
+	ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+	ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+	echo "Usage: $0 [-h] [-s <seconds>]"
+	echo -e "\t-h: show this help"
+	echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+	case "$option" in
+	"h")
+		usage $0
+		exit 0
+		;;
+	"s")
+		STRESS=$OPTARG
+		;;
+	esac
+done
+
+if [ ! -f ${BPF_FILE} ]; then
+	echo "Missing ${BPF_FILE}. Run 'make' first"
+	exit 1
+fi
+
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
+create_ns
+chk_gro_flag "default - gro flag" $SRC off
+chk_gro_flag "        - peer gro flag" $DST off
+chk_tso_flag "        - tso flag" $SRC on
+chk_tso_flag "        - peer tso flag" $DST on
+chk_gro "        - aggregation" 1
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+chk_gro "        - aggregation with TSO off" 10
+cleanup
+
+create_ns
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+chk_gro_flag "with gro on - gro flag" $DST on
+chk_gro_flag "        - peer gro flag" $SRC off
+chk_tso_flag "        - tso flag" $SRC on
+chk_tso_flag "        - peer tso flag" $DST on
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro "        - aggregation with TSO off" 1
+cleanup
+
+create_ns
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp
+chk_gro_flag "gro vs xdp while down - gro flag off" $DST off
+ip -n $NS_DST link set dev veth$DST down
+chk_gro_flag "                      - after down" $DST off
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag "                      - after xdp off" $DST off
+ip -n $NS_DST link set dev veth$DST up
+chk_gro_flag "                      - after up" $DST off
+ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp
+chk_gro_flag "                      - after peer xdp" $DST off
+cleanup
+
+create_ns
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp
+ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+chk_gro_flag "gro vs xdp while down - gro flag on" $DST on
+ip -n $NS_DST link set dev veth$DST down
+chk_gro_flag "                      - after down" $DST on
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag "                      - after xdp off" $DST on
+ip -n $NS_DST link set dev veth$DST up
+chk_gro_flag "                      - after up" $DST on
+ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp
+chk_gro_flag "                      - after peer xdp" $DST on
+cleanup
+
+create_ns
+chk_channels "default channels" $DST 1 1
+
+ip -n $NS_DST link set dev veth$DST down
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+chk_gro_flag "with gro enabled on link down - gro flag" $DST on
+chk_gro_flag "        - peer gro flag" $SRC off
+chk_tso_flag "        - tso flag" $SRC on
+chk_tso_flag "        - peer tso flag" $DST on
+ip -n $NS_DST link set dev veth$DST up
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro "        - aggregation with TSO off" 1
+cleanup
+
+create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+	ip netns exec $NS_DST ethtool -L veth$DST tx 2
+	chk_channels "setting tx channels" $DST 1 2
+	CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+	ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+	chk_channels "setting both rx and tx channels" $DST 3 3
+	CUR_RX=3
+	CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+	# this also tests queues nr reduction
+	ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+	ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+	printf "%-60s" "bad setting: XDP with RX nr less than TX"
+	ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \
+		section xdp 2>/dev/null &&\
+		echo "fail - set operation successful ?!?" || echo " ok "
+
+	# the following tests will run with multiple channels active
+	ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+	ip netns exec $NS_DST ethtool -L veth$DST rx 2
+	ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \
+		section xdp 2>/dev/null
+	printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+	ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+		echo "fail - set operation successful ?!?" || echo " ok "
+	CUR_RX=2
+	CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+	printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+	ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+		echo "fail - set operation successful ?!?" || echo " ok "
+	chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
+ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null
+chk_gro_flag "with xdp attached - gro flag" $DST off
+chk_gro_flag "        - peer gro flag" $SRC off
+chk_tso_flag "        - tso flag" $SRC off
+chk_tso_flag "        - peer tso flag" $DST on
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro "        - no aggregation" 10
+ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+chk_gro_flag "        - gro flag with GRO on" $DST on
+chk_gro "        - aggregation" 1
+
+
+ip -n $NS_DST link set dev veth$DST down
+ip -n $NS_SRC link set dev veth$SRC down
+chk_gro_flag "        - after dev off, flag" $DST on
+chk_gro_flag "        - peer flag" $SRC off
+
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag "        - after gro on xdp off, gro flag" $DST on
+chk_gro_flag "        - peer gro flag" $SRC off
+chk_tso_flag "        - tso flag" $SRC on
+chk_tso_flag "        - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+	ip netns exec $NS_DST ethtool -L veth$DST tx 1
+	chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_SRC link set dev veth$SRC up
+chk_gro "        - aggregation" 1
+
+if [ $CPUS -gt 1 ]; then
+	[ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+	ip -n $NS_DST link set dev veth$DST down
+	ip -n $NS_SRC link set dev veth$SRC down
+	ip netns exec $NS_DST ethtool -L veth$DST tx 2
+	chk_channels "increasing tx channels with device down" $DST 2 2
+	ip -n $NS_DST link set dev veth$DST up
+	ip -n $NS_SRC link set dev veth$SRC up
+fi
+
+ip netns exec $NS_DST ethtool -K veth$DST gro off
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+chk_gro "aggregation again with default and TSO off" 10
+
+exit $ret
diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh
new file mode 100755
index 000000000000..7bc804ffaf7c
--- /dev/null
+++ b/tools/testing/selftests/net/vlan_hw_filter.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+ret=0
+
+cleanup() {
+	ip netns del $NETNS
+}
+
+trap cleanup EXIT
+
+fail() {
+    echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+    ret=1
+}
+
+ip netns add ${NETNS}
+ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
+ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
+ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
+ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
+ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
+ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+
+exit $ret
diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh
index 184da81f554f..b64dd891699d 100755
--- a/tools/testing/selftests/net/vrf-xfrm-tests.sh
+++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh
@@ -3,9 +3,7 @@
 #
 # Various combinations of VRF with xfrms and qdisc.
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
+source lib.sh
 PAUSE_ON_FAIL=no
 VERBOSE=0
 ret=0
@@ -67,7 +65,7 @@ run_cmd_host1()
 		printf "    COMMAND: $cmd\n"
 	fi
 
-	out=$(eval ip netns exec host1 $cmd 2>&1)
+	out=$(eval ip netns exec $host1 $cmd 2>&1)
 	rc=$?
 	if [ "$VERBOSE" = "1" ]; then
 		if [ -n "$out" ]; then
@@ -116,9 +114,6 @@ create_ns()
 	[ -z "${addr}" ] && addr="-"
 	[ -z "${addr6}" ] && addr6="-"
 
-	ip netns add ${ns}
-
-	ip -netns ${ns} link set lo up
 	if [ "${addr}" != "-" ]; then
 		ip -netns ${ns} addr add dev lo ${addr}
 	fi
@@ -177,25 +172,25 @@ connect_ns()
 
 cleanup()
 {
-	ip netns del host1
-	ip netns del host2
+	cleanup_ns $host1 $host2
 }
 
 setup()
 {
-	create_ns "host1"
-	create_ns "host2"
+	setup_ns host1 host2
+	create_ns "$host1"
+	create_ns "$host2"
 
-	connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
-	           "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
+	connect_ns "$host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
+	           "$host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
 
-	create_vrf "host1" ${VRF} ${TABLE}
-	ip -netns host1 link set dev eth0 master ${VRF}
+	create_vrf "$host1" ${VRF} ${TABLE}
+	ip -netns $host1 link set dev eth0 master ${VRF}
 }
 
 cleanup_xfrm()
 {
-	for ns in host1 host2
+	for ns in $host1 $host2
 	do
 		for x in state policy
 		do
@@ -218,127 +213,127 @@ setup_xfrm()
 	#
 
 	# host1 - IPv4 out
-	ip -netns host1 xfrm policy add \
+	ip -netns $host1 xfrm policy add \
 	  src ${h1_4} dst ${h2_4} ${devarg} dir out \
 	  tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
 
 	# host2 - IPv4 in
-	ip -netns host2 xfrm policy add \
+	ip -netns $host2 xfrm policy add \
 	  src ${h1_4} dst ${h2_4} dir in \
 	  tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
 
 	# host1 - IPv4 in
-	ip -netns host1 xfrm policy add \
+	ip -netns $host1 xfrm policy add \
 	  src ${h2_4} dst ${h1_4} ${devarg} dir in \
 	  tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
 
 	# host2 - IPv4 out
-	ip -netns host2 xfrm policy add \
+	ip -netns $host2 xfrm policy add \
 	  src ${h2_4} dst ${h1_4} dir out \
 	  tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
 
 
 	# host1 - IPv6 out
-	ip -6 -netns host1 xfrm policy add \
+	ip -6 -netns $host1 xfrm policy add \
 	  src ${h1_6} dst ${h2_6} ${devarg} dir out \
 	  tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
 
 	# host2 - IPv6 in
-	ip -6 -netns host2 xfrm policy add \
+	ip -6 -netns $host2 xfrm policy add \
 	  src ${h1_6} dst ${h2_6} dir in \
 	  tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
 
 	# host1 - IPv6 in
-	ip -6 -netns host1 xfrm policy add \
+	ip -6 -netns $host1 xfrm policy add \
 	  src ${h2_6} dst ${h1_6} ${devarg} dir in \
 	  tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
 
 	# host2 - IPv6 out
-	ip -6 -netns host2 xfrm policy add \
+	ip -6 -netns $host2 xfrm policy add \
 	  src ${h2_6} dst ${h1_6} dir out \
 	  tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
 
 	#
 	# state
 	#
-	ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+	ip -netns $host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
 	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
-	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
-	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+	    enc 'cbc(aes)' ${ENC_1} \
 	    sel src ${h1_4} dst ${h2_4} ${devarg}
 
-	ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+	ip -netns $host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
 	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
-	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
-	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+	    enc 'cbc(aes)' ${ENC_1} \
 	    sel src ${h1_4} dst ${h2_4}
 
 
-	ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+	ip -netns $host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
 	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
-	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
-	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+	    enc 'cbc(aes)' ${ENC_2} \
 	    sel src ${h2_4} dst ${h1_4} ${devarg}
 
-	ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+	ip -netns $host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
 	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
-	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
-	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+	    enc 'cbc(aes)' ${ENC_2} \
 	    sel src ${h2_4} dst ${h1_4}
 
 
-	ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+	ip -6 -netns $host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
 	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
-	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
-	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+	    enc 'cbc(aes)' ${ENC_1} \
 	    sel src ${h1_6} dst ${h2_6} ${devarg}
 
-	ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+	ip -6 -netns $host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
 	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
-	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
-	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+	    enc 'cbc(aes)' ${ENC_1} \
 	    sel src ${h1_6} dst ${h2_6}
 
 
-	ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+	ip -6 -netns $host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
 	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
-	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
-	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+	    enc 'cbc(aes)' ${ENC_2} \
 	    sel src ${h2_6} dst ${h1_6} ${devarg}
 
-	ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+	ip -6 -netns $host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
 	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
-	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
-	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+	    enc 'cbc(aes)' ${ENC_2} \
 	    sel src ${h2_6} dst ${h1_6}
 }
 
 cleanup_xfrm_dev()
 {
-	ip -netns host1 li del xfrm0
-	ip -netns host2 addr del ${XFRM2_4}/24 dev eth0
-	ip -netns host2 addr del ${XFRM2_6}/64 dev eth0
+	ip -netns $host1 li del xfrm0
+	ip -netns $host2 addr del ${XFRM2_4}/24 dev eth0
+	ip -netns $host2 addr del ${XFRM2_6}/64 dev eth0
 }
 
 setup_xfrm_dev()
 {
 	local vrfarg="vrf ${VRF}"
 
-	ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID}
-	ip -netns host1 li set xfrm0 ${vrfarg} up
-	ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0
-	ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0
+	ip -netns $host1 li add type xfrm dev eth0 if_id ${IF_ID}
+	ip -netns $host1 li set xfrm0 ${vrfarg} up
+	ip -netns $host1 addr add ${XFRM1_4}/24 dev xfrm0
+	ip -netns $host1 addr add ${XFRM1_6}/64 dev xfrm0
 
-	ip -netns host2 addr add ${XFRM2_4}/24 dev eth0
-	ip -netns host2 addr add ${XFRM2_6}/64 dev eth0
+	ip -netns $host2 addr add ${XFRM2_4}/24 dev eth0
+	ip -netns $host2 addr add ${XFRM2_6}/64 dev eth0
 
 	setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}"
 }
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index 23cf924754a5..2da32f4c479b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -58,6 +58,7 @@
 # to send an ICMP error back to the source when the ttl of a packet reaches 1
 # while it is forwarded between different vrfs.
 
+source lib.sh
 VERBOSE=0
 PAUSE_ON_FAIL=no
 DEFAULT_TTYPE=sym
@@ -171,11 +172,7 @@ run_cmd_grep()
 
 cleanup()
 {
-	local ns
-
-	for ns in h1 h2 r1 r2; do
-		ip netns del $ns 2>/dev/null
-	done
+	cleanup_ns $h1 $h2 $r1 $r2
 }
 
 setup_vrf()
@@ -212,72 +209,69 @@ setup_sym()
 
 	#
 	# create nodes as namespaces
-	#
-	for ns in h1 h2 r1; do
-		ip netns add $ns
-		ip -netns $ns link set lo up
-
-		case "${ns}" in
-		h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
-			;;
-		r1)    ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
-		esac
+	setup_ns h1 h2 r1
+	for ns in $h1 $h2 $r1; do
+		if echo $ns | grep -q h[12]-; then
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+		else
+			ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		fi
 	done
 
 	#
 	# create interconnects
 	#
-	ip -netns h1 link add eth0 type veth peer name r1h1
-	ip -netns h1 link set r1h1 netns r1 name eth0 up
+	ip -netns $h1 link add eth0 type veth peer name r1h1
+	ip -netns $h1 link set r1h1 netns $r1 name eth0 up
 
-	ip -netns h2 link add eth0 type veth peer name r1h2
-	ip -netns h2 link set r1h2 netns r1 name eth1 up
+	ip -netns $h2 link add eth0 type veth peer name r1h2
+	ip -netns $h2 link set r1h2 netns $r1 name eth1 up
 
 	#
 	# h1
 	#
-	ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
-	ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
-	ip -netns h1 link set eth0 up
+	ip -netns $h1 addr add dev eth0 ${H1_N1_IP}/24
+	ip -netns $h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+	ip -netns $h1 link set eth0 up
 
 	# h1 to h2 via r1
-	ip -netns h1    route add ${H2_N2} via ${R1_N1_IP} dev eth0
-	ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+	ip -netns $h1    route add ${H2_N2} via ${R1_N1_IP} dev eth0
+	ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
 
 	#
 	# h2
 	#
-	ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
-	ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
-	ip -netns h2 link set eth0 up
+	ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24
+	ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+	ip -netns $h2 link set eth0 up
 
 	# h2 to h1 via r1
-	ip -netns h2 route add default via ${R1_N2_IP} dev eth0
-	ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
+	ip -netns $h2 route add default via ${R1_N2_IP} dev eth0
+	ip -netns $h2 -6 route add default via ${R1_N2_IP6} dev eth0
 
 	#
 	# r1
 	#
-	setup_vrf r1
-	create_vrf r1 blue 1101
-	create_vrf r1 red 1102
-	ip -netns r1 link set mtu 1400 dev eth1
-	ip -netns r1 link set eth0 vrf blue up
-	ip -netns r1 link set eth1 vrf red up
-	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
-	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
-	ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
-	ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+	setup_vrf $r1
+	create_vrf $r1 blue 1101
+	create_vrf $r1 red 1102
+	ip -netns $r1 link set mtu 1400 dev eth1
+	ip -netns $r1 link set eth0 vrf blue up
+	ip -netns $r1 link set eth1 vrf red up
+	ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24
+	ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
 
 	# Route leak from blue to red
-	ip -netns r1 route add vrf blue ${H2_N2} dev red
-	ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+	ip -netns $r1 route add vrf blue ${H2_N2} dev red
+	ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red
 
 	# Route leak from red to blue
-	ip -netns r1 route add vrf red ${H1_N1} dev blue
-	ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
+	ip -netns $r1 route add vrf red ${H1_N1} dev blue
+	ip -netns $r1 -6 route add vrf red ${H1_N1_6} dev blue
 
 
 	# Wait for ip config to settle
@@ -293,90 +287,87 @@ setup_asym()
 
 	#
 	# create nodes as namespaces
-	#
-	for ns in h1 h2 r1 r2; do
-		ip netns add $ns
-		ip -netns $ns link set lo up
-
-		case "${ns}" in
-		h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
-			;;
-		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
-		esac
+	setup_ns h1 h2 r1 r2
+	for ns in $h1 $h2 $r1 $r2; do
+		if echo $ns | grep -q h[12]-; then
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+		else
+			ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		fi
 	done
 
 	#
 	# create interconnects
 	#
-	ip -netns h1 link add eth0 type veth peer name r1h1
-	ip -netns h1 link set r1h1 netns r1 name eth0 up
+	ip -netns $h1 link add eth0 type veth peer name r1h1
+	ip -netns $h1 link set r1h1 netns $r1 name eth0 up
 
-	ip -netns h1 link add eth1 type veth peer name r2h1
-	ip -netns h1 link set r2h1 netns r2 name eth0 up
+	ip -netns $h1 link add eth1 type veth peer name r2h1
+	ip -netns $h1 link set r2h1 netns $r2 name eth0 up
 
-	ip -netns h2 link add eth0 type veth peer name r1h2
-	ip -netns h2 link set r1h2 netns r1 name eth1 up
+	ip -netns $h2 link add eth0 type veth peer name r1h2
+	ip -netns $h2 link set r1h2 netns $r1 name eth1 up
 
-	ip -netns h2 link add eth1 type veth peer name r2h2
-	ip -netns h2 link set r2h2 netns r2 name eth1 up
+	ip -netns $h2 link add eth1 type veth peer name r2h2
+	ip -netns $h2 link set r2h2 netns $r2 name eth1 up
 
 	#
 	# h1
 	#
-	ip -netns h1 link add br0 type bridge
-	ip -netns h1 link set br0 up
-	ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
-	ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
-	ip -netns h1 link set eth0 master br0 up
-	ip -netns h1 link set eth1 master br0 up
+	ip -netns $h1 link add br0 type bridge
+	ip -netns $h1 link set br0 up
+	ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24
+	ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+	ip -netns $h1 link set eth0 master br0 up
+	ip -netns $h1 link set eth1 master br0 up
 
 	# h1 to h2 via r1
-	ip -netns h1    route add ${H2_N2} via ${R1_N1_IP} dev br0
-	ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+	ip -netns $h1    route add ${H2_N2} via ${R1_N1_IP} dev br0
+	ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
 
 	#
 	# h2
 	#
-	ip -netns h2 link add br0 type bridge
-	ip -netns h2 link set br0 up
-	ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
-	ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
-	ip -netns h2 link set eth0 master br0 up
-	ip -netns h2 link set eth1 master br0 up
+	ip -netns $h2 link add br0 type bridge
+	ip -netns $h2 link set br0 up
+	ip -netns $h2 addr add dev br0 ${H2_N2_IP}/24
+	ip -netns $h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+	ip -netns $h2 link set eth0 master br0 up
+	ip -netns $h2 link set eth1 master br0 up
 
 	# h2 to h1 via r2
-	ip -netns h2 route add default via ${R2_N2_IP} dev br0
-	ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
+	ip -netns $h2 route add default via ${R2_N2_IP} dev br0
+	ip -netns $h2 -6 route add default via ${R2_N2_IP6} dev br0
 
 	#
 	# r1
 	#
-	setup_vrf r1
-	create_vrf r1 blue 1101
-	create_vrf r1 red 1102
-	ip -netns r1 link set mtu 1400 dev eth1
-	ip -netns r1 link set eth0 vrf blue up
-	ip -netns r1 link set eth1 vrf red up
-	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
-	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
-	ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
-	ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+	setup_vrf $r1
+	create_vrf $r1 blue 1101
+	create_vrf $r1 red 1102
+	ip -netns $r1 link set mtu 1400 dev eth1
+	ip -netns $r1 link set eth0 vrf blue up
+	ip -netns $r1 link set eth1 vrf red up
+	ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24
+	ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
 
 	# Route leak from blue to red
-	ip -netns r1 route add vrf blue ${H2_N2} dev red
-	ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+	ip -netns $r1 route add vrf blue ${H2_N2} dev red
+	ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red
 
 	# No route leak from red to blue
 
 	#
 	# r2
 	#
-	ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
-	ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
-	ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
-	ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+	ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24
+	ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+	ip -netns $r2 addr add dev eth1 ${R2_N2_IP}/24
+	ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
 
 	# Wait for ip config to settle
 	sleep 2
@@ -384,14 +375,14 @@ setup_asym()
 
 check_connectivity()
 {
-	ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+	ip netns exec $h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
 	log_test $? 0 "Basic IPv4 connectivity"
 	return $?
 }
 
 check_connectivity6()
 {
-	ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+	ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
 	log_test $? 0 "Basic IPv6 connectivity"
 	return $?
 }
@@ -426,7 +417,7 @@ ipv4_traceroute()
 
 	check_connectivity || return
 
-	run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
+	run_cmd_grep "${R1_N1_IP}" ip netns exec $h1 traceroute ${H2_N2_IP}
 	log_test $? 0 "Traceroute reports a hop on r1"
 }
 
@@ -449,7 +440,7 @@ ipv6_traceroute()
 
 	check_connectivity6 || return
 
-	run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
+	run_cmd_grep "${R1_N1_IP6}" ip netns exec $h1 traceroute6 ${H2_N2_IP6}
 	log_test $? 0 "Traceroute6 reports a hop on r1"
 }
 
@@ -470,7 +461,7 @@ ipv4_ping_ttl()
 
 	check_connectivity || return
 
-	run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+	run_cmd_grep "Time to live exceeded" ip netns exec $h1 ping -t1 -c1 -W2 ${H2_N2_IP}
 	log_test $? 0 "Ping received ICMP ttl exceeded"
 }
 
@@ -491,7 +482,7 @@ ipv4_ping_frag()
 
 	check_connectivity || return
 
-	run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+	run_cmd_grep "Frag needed" ip netns exec $h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
 	log_test $? 0 "Ping received ICMP Frag needed"
 }
 
@@ -512,7 +503,7 @@ ipv6_ping_ttl()
 
 	check_connectivity6 || return
 
-	run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+	run_cmd_grep "Time exceeded: Hop limit" ip netns exec $h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
 	log_test $? 0 "Ping received ICMP Hop limit"
 }
 
@@ -533,7 +524,7 @@ ipv6_ping_frag()
 
 	check_connectivity6 || return
 
-	run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+	run_cmd_grep "Packet too big" ip netns exec $h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
 	log_test $? 0 "Ping received ICMP Packet too big"
 }
 
@@ -565,7 +556,7 @@ EOF
 command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
 
 TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
-TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym"
 
 ret=0
 nsuccess=0
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 18b982d611de..01552b542544 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -3,6 +3,7 @@
 
 # This test is designed for testing the new VRF strict_mode functionality.
 
+source lib.sh
 ret=0
 
 # identifies the "init" network namespace which is often called root network
@@ -11,6 +12,8 @@ INIT_NETNS_NAME="init"
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 
+TESTS="init testns mix"
+
 log_test()
 {
 	local rc=$1
@@ -242,13 +245,12 @@ setup()
 {
 	modprobe vrf
 
-	ip netns add testns
-	ip netns exec testns ip link set lo up
+	setup_ns testns
 }
 
 cleanup()
 {
-	ip netns del testns 2>/dev/null
+	ip netns del $testns 2>/dev/null
 
 	ip link del vrf100 2>/dev/null
 	ip link del vrf101 2>/dev/null
@@ -259,6 +261,8 @@ cleanup()
 
 vrf_strict_mode_tests_init()
 {
+	log_section "VRF strict_mode test on init network namespace"
+
 	vrf_strict_mode_check_support init
 
 	strict_mode_check_default init
@@ -289,67 +293,83 @@ vrf_strict_mode_tests_init()
 
 vrf_strict_mode_tests_testns()
 {
-	vrf_strict_mode_check_support testns
+	log_section "VRF strict_mode test on testns network namespace"
 
-	strict_mode_check_default testns
+	vrf_strict_mode_check_support $testns
 
-	enable_strict_mode_and_check testns
+	strict_mode_check_default $testns
 
-	add_vrf_and_check testns vrf100 100
-	config_vrf_and_check testns 10.0.100.1/24 vrf100
+	enable_strict_mode_and_check $testns
 
-	add_vrf_and_check_fail testns vrf101 100
+	add_vrf_and_check $testns vrf100 100
+	config_vrf_and_check $testns 10.0.100.1/24 vrf100
 
-	add_vrf_and_check_fail testns vrf102 100
+	add_vrf_and_check_fail $testns vrf101 100
 
-	add_vrf_and_check testns vrf200 200
+	add_vrf_and_check_fail $testns vrf102 100
 
-	disable_strict_mode_and_check testns
+	add_vrf_and_check $testns vrf200 200
 
-	add_vrf_and_check testns vrf101 100
+	disable_strict_mode_and_check $testns
 
-	add_vrf_and_check testns vrf102 100
+	add_vrf_and_check $testns vrf101 100
 
-	#the strict_mode is disabled in the testns
+	add_vrf_and_check $testns vrf102 100
+
+	#the strict_mode is disabled in the $testns
 }
 
 vrf_strict_mode_tests_mix()
 {
+	log_section "VRF strict_mode test mixing init and testns network namespaces"
+
 	read_strict_mode_compare_and_check init 1
 
-	read_strict_mode_compare_and_check testns 0
+	read_strict_mode_compare_and_check $testns 0
 
-	del_vrf_and_check testns vrf101
+	del_vrf_and_check $testns vrf101
 
-	del_vrf_and_check testns vrf102
+	del_vrf_and_check $testns vrf102
 
 	disable_strict_mode_and_check init
 
-	enable_strict_mode_and_check testns
+	enable_strict_mode_and_check $testns
 
 	enable_strict_mode_and_check init
 	enable_strict_mode_and_check init
 
-	disable_strict_mode_and_check testns
-	disable_strict_mode_and_check testns
+	disable_strict_mode_and_check $testns
+	disable_strict_mode_and_check $testns
 
 	read_strict_mode_compare_and_check init 1
 
-	read_strict_mode_compare_and_check testns 0
+	read_strict_mode_compare_and_check $testns 0
 }
 
-vrf_strict_mode_tests()
-{
-	log_section "VRF strict_mode test on init network namespace"
-	vrf_strict_mode_tests_init
+################################################################################
+# usage
 
-	log_section "VRF strict_mode test on testns network namespace"
-	vrf_strict_mode_tests_testns
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
 
-	log_section "VRF strict_mode test mixing init and testns network namespaces"
-	vrf_strict_mode_tests_mix
+	-t <test>	Test(s) to run (default: all)
+			(options: $TESTS)
+EOF
 }
 
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
 vrf_strict_mode_check_support()
 {
 	local nsname=$1
@@ -371,24 +391,34 @@ vrf_strict_mode_check_support()
 
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
 	echo "SKIP: vrf sysctl does not exist"
-	exit 0
+	exit $ksft_skip
 fi
 
 cleanup &> /dev/null
 
 setup
-vrf_strict_mode_tests
+for t in $TESTS
+do
+	case $t in
+	vrf_strict_mode_tests_init|init) vrf_strict_mode_tests_init;;
+	vrf_strict_mode_tests_testns|testns) vrf_strict_mode_tests_testns;;
+	vrf_strict_mode_tests_mix|mix) vrf_strict_mode_tests_mix;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+
+	esac
+done
 cleanup
 
 print_log_test_results
diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c
new file mode 100644
index 000000000000..d988b2e0cee8
--- /dev/null
+++ b/tools/testing/selftests/net/xdp_dummy.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 7a1bf94c5bd3..457789530645 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -18,8 +18,7 @@
 # ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception)
 # ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception)
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
 ret=0
 policy_checks_ok=1
 
@@ -202,26 +201,26 @@ check_xfrm() {
 	# 1: iptables -m policy rule count != 0
 	rval=$1
 	ip=$2
-	lret=0
+	local lret=0
 
-	ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null
+	ip netns exec ${ns[1]} ping -q -c 1 10.0.2.$ip > /dev/null
 
-	check_ipt_policy_count ns3
+	check_ipt_policy_count ${ns[3]}
 	if [ $? -ne $rval ] ; then
 		lret=1
 	fi
-	check_ipt_policy_count ns4
+	check_ipt_policy_count ${ns[4]}
 	if [ $? -ne $rval ] ; then
 		lret=1
 	fi
 
-	ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null
+	ip netns exec ${ns[2]} ping -q -c 1 10.0.1.$ip > /dev/null
 
-	check_ipt_policy_count ns3
+	check_ipt_policy_count ${ns[3]}
 	if [ $? -ne $rval ] ; then
 		lret=1
 	fi
-	check_ipt_policy_count ns4
+	check_ipt_policy_count ${ns[4]}
 	if [ $? -ne $rval ] ; then
 		lret=1
 	fi
@@ -270,11 +269,11 @@ check_hthresh_repeat()
 	i=0
 
 	for i in $(seq 1 10);do
-		ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
-		ip -net ns1 xfrm policy set hthresh6 0 28 || break
+		ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
+		ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break
 
-		ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
-		ip -net ns1 xfrm policy set hthresh6 0 28 || break
+		ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
+		ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break
 	done
 
 	if [ $i -ne 10 ] ;then
@@ -287,6 +286,47 @@ check_hthresh_repeat()
 	return 0
 }
 
+# insert non-overlapping policies in a random order and check that
+# all of them can be fetched using the traffic selectors.
+check_random_order()
+{
+	local ns=$1
+	local log=$2
+
+	for i in $(seq 100); do
+		ip -net $ns xfrm policy flush
+		for j in $(seq 0 16 255 | sort -R); do
+			ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow
+		done
+		for j in $(seq 0 16 255); do
+			if ! ip -net $ns xfrm policy get dst $j.0.0.0/24 dir out > /dev/null; then
+				echo "FAIL: $log" 1>&2
+				return 1
+			fi
+		done
+	done
+
+	for i in $(seq 100); do
+		ip -net $ns xfrm policy flush
+		for j in $(seq 0 16 255 | sort -R); do
+			local addr=$(printf "e000:0000:%02x00::/56" $j)
+			ip -net $ns xfrm policy add dst $addr dir out priority 10 action allow
+		done
+		for j in $(seq 0 16 255); do
+			local addr=$(printf "e000:0000:%02x00::/56" $j)
+			if ! ip -net $ns xfrm policy get dst $addr dir out > /dev/null; then
+				echo "FAIL: $log" 1>&2
+				return 1
+			fi
+		done
+	done
+
+	ip -net $ns xfrm policy flush
+
+	echo "PASS: $log"
+	return 0
+}
+
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
@@ -306,79 +346,80 @@ if [ $? -ne 0 ];then
 	exit $ksft_skip
 fi
 
-for i in 1 2 3 4; do
-    ip netns add ns$i
-    ip -net ns$i link set lo up
-done
+setup_ns ns1 ns2 ns3 ns4
+ns[1]=$ns1
+ns[2]=$ns2
+ns[3]=$ns3
+ns[4]=$ns4
 
 DEV=veth0
-ip link add $DEV netns ns1 type veth peer name eth1 netns ns3
-ip link add $DEV netns ns2 type veth peer name eth1 netns ns4
+ip link add $DEV netns ${ns[1]} type veth peer name eth1 netns ${ns[3]}
+ip link add $DEV netns ${ns[2]} type veth peer name eth1 netns ${ns[4]}
 
-ip link add $DEV netns ns3 type veth peer name veth0 netns ns4
+ip link add $DEV netns ${ns[3]} type veth peer name veth0 netns ${ns[4]}
 
 DEV=veth0
 for i in 1 2; do
-    ip -net ns$i link set $DEV up
-    ip -net ns$i addr add 10.0.$i.2/24 dev $DEV
-    ip -net ns$i addr add dead:$i::2/64 dev $DEV
-
-    ip -net ns$i addr add 10.0.$i.253 dev $DEV
-    ip -net ns$i addr add 10.0.$i.254 dev $DEV
-    ip -net ns$i addr add dead:$i::fd dev $DEV
-    ip -net ns$i addr add dead:$i::fe dev $DEV
+    ip -net ${ns[$i]} link set $DEV up
+    ip -net ${ns[$i]} addr add 10.0.$i.2/24 dev $DEV
+    ip -net ${ns[$i]} addr add dead:$i::2/64 dev $DEV
+
+    ip -net ${ns[$i]} addr add 10.0.$i.253 dev $DEV
+    ip -net ${ns[$i]} addr add 10.0.$i.254 dev $DEV
+    ip -net ${ns[$i]} addr add dead:$i::fd dev $DEV
+    ip -net ${ns[$i]} addr add dead:$i::fe dev $DEV
 done
 
 for i in 3 4; do
-ip -net ns$i link set eth1 up
-ip -net ns$i link set veth0 up
+    ip -net ${ns[$i]} link set eth1 up
+    ip -net ${ns[$i]} link set veth0 up
 done
 
-ip -net ns1 route add default via 10.0.1.1
-ip -net ns2 route add default via 10.0.2.1
+ip -net ${ns[1]} route add default via 10.0.1.1
+ip -net ${ns[2]} route add default via 10.0.2.1
 
-ip -net ns3 addr add 10.0.1.1/24 dev eth1
-ip -net ns3 addr add 10.0.3.1/24 dev veth0
-ip -net ns3 addr add 2001:1::1/64 dev eth1
-ip -net ns3 addr add 2001:3::1/64 dev veth0
+ip -net ${ns[3]} addr add 10.0.1.1/24 dev eth1
+ip -net ${ns[3]} addr add 10.0.3.1/24 dev veth0
+ip -net ${ns[3]} addr add 2001:1::1/64 dev eth1
+ip -net ${ns[3]} addr add 2001:3::1/64 dev veth0
 
-ip -net ns3 route add default via 10.0.3.10
+ip -net ${ns[3]} route add default via 10.0.3.10
 
-ip -net ns4 addr add 10.0.2.1/24 dev eth1
-ip -net ns4 addr add 10.0.3.10/24 dev veth0
-ip -net ns4 addr add 2001:2::1/64 dev eth1
-ip -net ns4 addr add 2001:3::10/64 dev veth0
-ip -net ns4 route add default via 10.0.3.1
+ip -net ${ns[4]} addr add 10.0.2.1/24 dev eth1
+ip -net ${ns[4]} addr add 10.0.3.10/24 dev veth0
+ip -net ${ns[4]} addr add 2001:2::1/64 dev eth1
+ip -net ${ns[4]} addr add 2001:3::10/64 dev veth0
+ip -net ${ns[4]} route add default via 10.0.3.1
 
 for j in 4 6; do
 	for i in 3 4;do
-		ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
-		ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
+		ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
+		ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
 	done
 done
 
 # abuse iptables rule counter to check if ping matches a policy
-ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
-ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+ip netns exec ${ns[3]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+ip netns exec ${ns[4]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not insert iptables rule"
-	for i in 1 2 3 4;do ip netns del ns$i;done
+	cleanup_ns $ns1 $ns2 $ns3 $ns4
 	exit $ksft_skip
 fi
 
 #          localip  remoteip  localnet    remotenet
-do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
-do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
-do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
-do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
+do_esp ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp ${ns[3]} dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
+do_esp ${ns[4]} 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp ${ns[4]} dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
 
-do_dummies4 ns3
-do_dummies6 ns4
+do_dummies4 ${ns[3]}
+do_dummies6 ${ns[4]}
 
-do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24
-do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24
-do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64
-do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64
+do_esp_policy_get_check ${ns[3]} 10.0.1.0/24 10.0.2.0/24
+do_esp_policy_get_check ${ns[4]} 10.0.2.0/24 10.0.1.0/24
+do_esp_policy_get_check ${ns[3]} dead:1::/64 dead:2::/64
+do_esp_policy_get_check ${ns[4]} dead:2::/64 dead:1::/64
 
 # ping to .254 should use ipsec, exception is not installed.
 check_xfrm 1 254
@@ -391,11 +432,11 @@ fi
 
 # installs exceptions
 #                localip  remoteip   encryptdst  plaindst
-do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
-do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
+do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+do_exception ${ns[4]} 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
 
-do_exception ns3 dead:3::1 dead:3::10 dead:2::fd  dead:2:f0::/96
-do_exception ns4 dead:3::10 dead:3::1 dead:1::fd  dead:1:f0::/96
+do_exception ${ns[3]} dead:3::1 dead:3::10 dead:2::fd  dead:2:f0::/96
+do_exception ${ns[4]} dead:3::10 dead:3::1 dead:1::fd  dead:1:f0::/96
 
 check_exceptions "exceptions"
 if [ $? -ne 0 ]; then
@@ -403,14 +444,14 @@ if [ $? -ne 0 ]; then
 fi
 
 # insert block policies with adjacent/overlapping netmasks
-do_overlap ns3
+do_overlap ${ns[3]}
 
 check_exceptions "exceptions and block policies"
 if [ $? -ne 0 ]; then
 	ret=1
 fi
 
-for n in ns3 ns4;do
+for n in ${ns[3]} ${ns[4]};do
 	ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125
 	sleep $((RANDOM%5))
 done
@@ -418,19 +459,19 @@ done
 check_exceptions "exceptions and block policies after hresh changes"
 
 # full flush of policy db, check everything gets freed incl. internal meta data
-ip -net ns3 xfrm policy flush
+ip -net ${ns[3]} xfrm policy flush
 
-do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
-do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+do_esp_policy ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
+do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
 
 # move inexact policies to hash table
-ip -net ns3 xfrm policy set hthresh4 16 16
+ip -net ${ns[3]} xfrm policy set hthresh4 16 16
 
 sleep $((RANDOM%5))
 check_exceptions "exceptions and block policies after hthresh change in ns3"
 
 # restore original hthresh settings -- move policies back to tables
-for n in ns3 ns4;do
+for n in ${ns[3]} ${ns[4]};do
 	ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128
 	sleep $((RANDOM%5))
 done
@@ -438,6 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal"
 
 check_hthresh_repeat "policies with repeated htresh change"
 
-for i in 1 2 3 4;do ip netns del ns$i;done
+check_random_order ${ns[3]} "policies inserted in random order"
+
+cleanup_ns $ns1 $ns2 $ns3 $ns4
 
 exit $ret
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
index 8448f74adfec..c2229b3e40d4 100644
--- a/tools/testing/selftests/netfilter/.gitignore
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -1,2 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 nf-queue
+connect_close
+audit_logread
+conntrack_dump_flush
+sctp_collision
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index a374e10ef506..936c3085bb83 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,12 +1,21 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for netfilter selftests
 
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
 	conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
 	nft_concat_range.sh nft_conntrack_helper.sh \
-	nft_queue.sh nft_meta.sh
+	nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
+	ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
+	conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
+	conntrack_sctp_collision.sh xt_string.sh \
+	bridge_netfilter.sh
 
-LDLIBS = -lmnl
-TEST_GEN_FILES =  nf-queue
+HOSTPKG_CONFIG := pkg-config
+
+CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
+LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+
+TEST_GEN_FILES =  nf-queue connect_close audit_logread sctp_collision \
+	conntrack_dump_flush
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c
new file mode 100644
index 000000000000..a0a880fc2d9d
--- /dev/null
+++ b/tools/testing/selftests/netfilter/audit_logread.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <linux/audit.h>
+#include <linux/netlink.h>
+
+static int fd;
+
+#define MAX_AUDIT_MESSAGE_LENGTH	8970
+struct audit_message {
+	struct nlmsghdr nlh;
+	union {
+		struct audit_status s;
+		char data[MAX_AUDIT_MESSAGE_LENGTH];
+	} u;
+};
+
+int audit_recv(int fd, struct audit_message *rep)
+{
+	struct sockaddr_nl addr;
+	socklen_t addrlen = sizeof(addr);
+	int ret;
+
+	do {
+		ret = recvfrom(fd, rep, sizeof(*rep), 0,
+			       (struct sockaddr *)&addr, &addrlen);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0 ||
+	    addrlen != sizeof(addr) ||
+	    addr.nl_pid != 0 ||
+	    rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */
+		return -1;
+
+	return ret;
+}
+
+int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val)
+{
+	static int seq = 0;
+	struct audit_message msg = {
+		.nlh = {
+			.nlmsg_len   = NLMSG_SPACE(sizeof(msg.u.s)),
+			.nlmsg_type  = type,
+			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+			.nlmsg_seq   = ++seq,
+		},
+		.u.s = {
+			.mask    = key,
+			.enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+			.pid     = key == AUDIT_STATUS_PID ? val : 0,
+		}
+	};
+	struct sockaddr_nl addr = {
+		.nl_family = AF_NETLINK,
+	};
+	int ret;
+
+	do {
+		ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0,
+			     (struct sockaddr *)&addr, sizeof(addr));
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret != (int)msg.nlh.nlmsg_len)
+		return -1;
+	return 0;
+}
+
+int audit_set(int fd, uint32_t key, uint32_t val)
+{
+	struct audit_message rep = { 0 };
+	int ret;
+
+	ret = audit_send(fd, AUDIT_SET, key, val);
+	if (ret)
+		return ret;
+
+	ret = audit_recv(fd, &rep);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int readlog(int fd)
+{
+	struct audit_message rep = { 0 };
+	int ret = audit_recv(fd, &rep);
+	const char *sep = "";
+	char *k, *v;
+
+	if (ret < 0)
+		return ret;
+
+	if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG)
+		return 0;
+
+	/* skip the initial "audit(...): " part */
+	strtok(rep.u.data, " ");
+
+	while ((k = strtok(NULL, "="))) {
+		v = strtok(NULL, " ");
+
+		/* these vary and/or are uninteresting, ignore */
+		if (!strcmp(k, "pid") ||
+		    !strcmp(k, "comm") ||
+		    !strcmp(k, "subj"))
+			continue;
+
+		/* strip the varying sequence number */
+		if (!strcmp(k, "table"))
+			*strchrnul(v, ':') = '\0';
+
+		printf("%s%s=%s", sep, k, v);
+		sep = " ";
+	}
+	if (*sep) {
+		printf("\n");
+		fflush(stdout);
+	}
+	return 0;
+}
+
+void cleanup(int sig)
+{
+	audit_set(fd, AUDIT_STATUS_ENABLED, 0);
+	close(fd);
+	if (sig)
+		exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	struct sigaction act = {
+		.sa_handler = cleanup,
+	};
+
+	fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+	if (fd < 0) {
+		perror("Can't open netlink socket");
+		return -1;
+	}
+
+	if (sigaction(SIGTERM, &act, NULL) < 0 ||
+	    sigaction(SIGINT, &act, NULL) < 0) {
+		perror("Can't set signal handler");
+		close(fd);
+		return -1;
+	}
+
+	audit_set(fd, AUDIT_STATUS_ENABLED, 1);
+	audit_set(fd, AUDIT_STATUS_PID, getpid());
+
+	while (1)
+		readlog(fd);
+}
diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh
new file mode 100644
index 000000000000..659b3ab02c8b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bridge netfilter + conntrack, a combination that doesn't really work,
+# with multicast/broadcast packets racing for hash table insertion.
+
+#           eth0    br0     eth0
+# setup is: ns1 <->,ns0 <-> ns3
+#           ns2 <-'    `'-> ns4
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+ns3="ns3-$sfx"
+ns4="ns4-$sfx"
+
+ebtables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ebtables"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+for i in $(seq 0 4); do
+  eval ip netns add \$ns$i
+done
+
+cleanup() {
+  for i in $(seq 0 4); do eval ip netns del \$ns$i;done
+}
+
+trap cleanup EXIT
+
+do_ping()
+{
+	fromns="$1"
+	dstip="$2"
+
+	ip netns exec $fromns ping -c 1 -q $dstip > /dev/null
+	if [ $? -ne 0 ]; then
+		echo "ERROR: ping from $fromns to $dstip"
+		ip netns exec ${ns0} nft list ruleset
+		ret=1
+	fi
+}
+
+bcast_ping()
+{
+	fromns="$1"
+	dstip="$2"
+
+	for i in $(seq 1 1000); do
+		ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1
+		if [ $? -ne 0 ]; then
+			echo "ERROR: ping -b from $fromns to $dstip"
+			ip netns exec ${ns0} nft list ruleset
+			fi
+	done
+}
+
+ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1}
+if [ $? -ne 0 ]; then
+	echo "SKIP: Can't create veth device"
+	exit $ksft_skip
+fi
+
+ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2
+ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3
+ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4
+
+ip -net ${ns0} link set lo up
+
+for i in $(seq 1 4); do
+  ip -net ${ns0} link set veth$i up
+done
+
+ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1
+if [ $? -ne 0 ]; then
+	echo "SKIP: Can't create bridge br0"
+	exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+  ip -net ${ns0} link set veth$i master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private
+ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan0 up
+ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa
+ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net ${ns0} link set macvlan4 master br0
+
+ip -net ${ns0} link set br0 up
+ip -net ${ns0} addr add 10.0.0.1/24 dev br0
+ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1
+ret=$?
+if [ $ret -ne 0 ] ; then
+	echo "SKIP: bridge netfilter not available"
+	ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec ${ns0} nft -f - <<EOF
+table ip filter {
+	chain input {
+		type filter hook input priority 1; policy accept
+		iifname br0 counter
+		ct state new accept
+	}
+}
+
+table bridge filter {
+	chain forward {
+		type filter hook forward priority 0; policy accept
+		meta pkttype broadcast ip protocol icmp counter drop
+	}
+}
+EOF
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+  eval ip -net \$ns$i link set lo up
+  eval ip -net \$ns$i link set eth0 up
+done
+for i in $(seq 1 2); do
+  eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0
+done
+
+ip -net ${ns3} addr add 10.23.0.13/24 dev eth0
+ip -net ${ns4} addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping ${ns1} 10.0.0.12
+do_ping ${ns3} 10.23.0.1
+do_ping ${ns4} 10.23.0.1
+
+if [ $ret -eq 0 ];then
+	echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0"
+fi
+
+bcast_ping ${ns1} 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping ${ns3} 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping ${ns4} 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+
+if [ $t -eq 0 ];then
+	echo PASS: kernel not tainted
+else
+	echo ERROR: kernel is tainted
+	ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 4faf2ce021d9..7c42b1b2c69b 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m
 CONFIG_NFT_MASQ=m
 CONFIG_NFT_FLOW_OFFLOAD=m
 CONFIG_NF_CT_NETLINK=m
+CONFIG_AUDIT=y
diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/netfilter/connect_close.c
new file mode 100644
index 000000000000..1c3b0add54c4
--- /dev/null
+++ b/tools/testing/selftests/netfilter/connect_close.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#define PORT 12345
+#define RUNTIME 10
+
+static struct {
+	unsigned int timeout;
+	unsigned int port;
+} opts = {
+	.timeout = RUNTIME,
+	.port = PORT,
+};
+
+static void handler(int sig)
+{
+	_exit(sig == SIGALRM ? 0 : 1);
+}
+
+static void set_timeout(void)
+{
+	struct sigaction action = {
+		.sa_handler = handler,
+	};
+
+	sigaction(SIGALRM, &action, NULL);
+
+	alarm(opts.timeout);
+}
+
+static void do_connect(const struct sockaddr_in *dst)
+{
+	int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+	if (s >= 0)
+		fcntl(s, F_SETFL, O_NONBLOCK);
+
+	connect(s, (struct sockaddr *)dst, sizeof(*dst));
+	close(s);
+}
+
+static void do_accept(const struct sockaddr_in *src)
+{
+	int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+	if (s < 0)
+		return;
+
+	setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+	setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+
+	bind(s, (struct sockaddr *)src, sizeof(*src));
+
+	listen(s, 16);
+
+	c = accept(s, NULL, NULL);
+	if (c >= 0)
+		close(c);
+
+	close(s);
+}
+
+static int accept_loop(void)
+{
+	struct sockaddr_in src = {
+		.sin_family = AF_INET,
+		.sin_port = htons(opts.port),
+	};
+
+	inet_pton(AF_INET, "127.0.0.1", &src.sin_addr);
+
+	set_timeout();
+
+	for (;;)
+		do_accept(&src);
+
+	return 1;
+}
+
+static int connect_loop(void)
+{
+	struct sockaddr_in dst = {
+		.sin_family = AF_INET,
+		.sin_port = htons(opts.port),
+	};
+
+	inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr);
+
+	set_timeout();
+
+	for (;;)
+		do_connect(&dst);
+
+	return 1;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "t:p:")) != -1) {
+		switch (c) {
+		case 't':
+			opts.timeout = atoi(optarg);
+			break;
+		case 'p':
+			opts.port = atoi(optarg);
+			break;
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	pid_t p;
+
+	parse_opts(argc, argv);
+
+	p = fork();
+	if (p < 0)
+		return 111;
+
+	if (p > 0)
+		return accept_loop();
+
+	return connect_loop();
+}
diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
new file mode 100644
index 000000000000..b11ea8ee6719
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <time.h>
+#include <libmnl/libmnl.h>
+#include <netinet/ip.h>
+
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include "../kselftest_harness.h"
+
+#define TEST_ZONE_ID 123
+#define NF_CT_DEFAULT_ZONE_ID 0
+
+static int reply_counter;
+
+static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type,
+			      uint32_t src_ip, uint32_t dst_ip,
+			      uint16_t src_port, uint16_t dst_port)
+{
+	struct nlattr *nest, *nest_ip, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, type);
+	if (!nest)
+		return -1;
+
+	nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+	if (!nest_ip)
+		return -1;
+	mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip);
+	mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip);
+	mnl_attr_nest_end(nlh, nest_ip);
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+	mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+	mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type,
+			      struct in6_addr src_ip, struct in6_addr dst_ip,
+			      uint16_t src_port, uint16_t dst_port)
+{
+	struct nlattr *nest, *nest_ip, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, type);
+	if (!nest)
+		return -1;
+
+	nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+	if (!nest_ip)
+		return -1;
+	mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip);
+	mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip);
+	mnl_attr_nest_end(nlh, nest_ip);
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+	mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+	mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_proto(struct nlmsghdr *nlh)
+{
+	struct nlattr *nest, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO);
+	if (!nest)
+		return -1;
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED);
+	mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a);
+	mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a);
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh,
+				 uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *rplnlh;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	ret = build_cta_proto(nlh);
+	if (ret < 0) {
+		perror("build_cta_proto");
+		return -1;
+	}
+	mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000));
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) {
+		perror("mnl_socket_sendto");
+		return -1;
+	}
+
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+	if (ret < 0) {
+		if (errno == EEXIST) {
+			/* The entries are probably still there from a previous
+			 * run. So we are good
+			 */
+			return 0;
+		}
+		perror("mnl_cb_run");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip,
+				      uint32_t dst_ip, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfh;
+	int ret;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+			   NLM_F_ACK | NLM_F_EXCL;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_INET;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v4");
+		return ret;
+	}
+	ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345);
+	if (ret < 0) {
+		perror("build_cta_tuple_v4");
+		return ret;
+	}
+	return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int conntrack_data_generate_v6(struct mnl_socket *sock,
+				      struct in6_addr src_ip,
+				      struct in6_addr dst_ip,
+				      uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfh;
+	int ret;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+			   NLM_F_ACK | NLM_F_EXCL;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_INET6;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip,
+				 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v6");
+		return ret;
+	}
+	ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip,
+				 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v6");
+		return ret;
+	}
+	return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int count_entries(const struct nlmsghdr *nlh, void *data)
+{
+	reply_counter++;
+}
+
+static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh, *rplnlh;
+	struct nfgenmsg *nfh;
+	struct nlattr *nest;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type	= (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_UNSPEC;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+	if (ret < 0) {
+		perror("mnl_socket_sendto");
+		return ret;
+	}
+
+	reply_counter = 0;
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	while (ret > 0) {
+		ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid,
+				 count_entries, NULL);
+		if (ret <= MNL_CB_STOP)
+			break;
+
+		ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	}
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	return reply_counter;
+}
+
+static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh, *rplnlh;
+	struct nfgenmsg *nfh;
+	struct nlattr *nest;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type	= (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_UNSPEC;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+	if (ret < 0) {
+		perror("mnl_socket_sendto");
+		return ret;
+	}
+
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+	if (ret < 0) {
+		perror("mnl_cb_run");
+		return ret;
+	}
+
+	return 0;
+}
+
+FIXTURE(conntrack_dump_flush)
+{
+	struct mnl_socket *sock;
+};
+
+FIXTURE_SETUP(conntrack_dump_flush)
+{
+	struct in6_addr src, dst;
+	int ret;
+
+	self->sock = mnl_socket_open(NETLINK_NETFILTER);
+	if (!self->sock) {
+		perror("mnl_socket_open");
+		exit(EXIT_FAILURE);
+	}
+
+	if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) {
+		perror("mnl_socket_bind");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	if (ret < 0 && errno == EPERM)
+		SKIP(return, "Needs to be run as root");
+	else if (ret < 0 && errno == EOPNOTSUPP)
+		SKIP(return, "Kernel does not seem to support conntrack zones");
+
+	ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1,
+					 TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3,
+					 TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 0);
+	ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
+					 TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 0);
+	ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7,
+					 NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x01000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x02000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x03000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x04000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 0);
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x05000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x06000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 0);
+
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x07000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x08000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_GE(ret, 2);
+	if (ret > 2)
+		SKIP(return, "kernel does not support filtering by zone");
+}
+
+FIXTURE_TEARDOWN(conntrack_dump_flush)
+{
+}
+
+TEST_F(conntrack_dump_flush, test_dump_by_zone)
+{
+	int ret;
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone)
+{
+	int ret;
+
+	ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone_default)
+{
+	int ret;
+
+	ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
index b48e1833bc89..76645aaf2b58 100755
--- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -35,6 +35,8 @@ cleanup() {
 	for i in 1 2;do ip netns del nsrouter$i;done
 }
 
+trap cleanup EXIT
+
 ipv4() {
     echo -n 192.168.$1.2
 }
@@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF
 table inet filter {
 	counter unknown { }
 	counter related { }
+	counter redir4 { }
+	counter redir6 { }
 	chain input {
 		type filter hook input priority 0; policy accept;
-		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
 
+		icmp type "redirect" ct state "related" counter name "redir4" accept
+		icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept
+
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
 		meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+
 		counter name "unknown" drop
 	}
 }
@@ -279,5 +287,29 @@ else
 	echo "ERROR: icmp error RELATED state test has failed"
 fi
 
-cleanup
+# add 'bad' route,  expect icmp REDIRECT to be generated
+ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1
+ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1
+
+ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null
+
+expect="packets 1 bytes 112"
+check_counter nsclient1 "redir4" "$expect"
+if [ $? -ne 0 ];then
+	ret=1
+fi
+
+ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null
+expect="packets 1 bytes 192"
+check_counter nsclient1 "redir6" "$expect"
+if [ $? -ne 0 ];then
+	ret=1
+fi
+
+if [ $ret -eq 0 ];then
+	echo "PASS: icmp redirects had RELATED state"
+else
+	echo "ERROR: icmp redirect RELATED state test has failed"
+fi
+
 exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
new file mode 100755
index 000000000000..a924e595cfd8
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP COLLISION SCENARIO as Below:
+#
+#   14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+#   14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+#   14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+#   14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+#   14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+#   14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+#
+# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP="198.51.200.1"
+CLIENT_PORT=1234
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP="198.51.100.1"
+SERVER_PORT=1234
+
+ROUTER_NS=$(mktemp -u router-XXXXXXXX)
+CLIENT_GW="198.51.200.2"
+SERVER_GW="198.51.100.2"
+
+# setup the topo
+setup() {
+	ip net add $CLIENT_NS
+	ip net add $SERVER_NS
+	ip net add $ROUTER_NS
+	ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS
+	ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS
+
+	ip -n $SERVER_NS link set link0 up
+	ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0
+	ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW
+
+	ip -n $ROUTER_NS link set link1 up
+	ip -n $ROUTER_NS link set link2 up
+	ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1
+	ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2
+	ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
+
+	ip -n $CLIENT_NS link set link3 up
+	ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3
+	ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW
+
+	# simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
+	# tc on $SERVER_NS side
+	tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb
+	tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit
+	tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
+		0xff match u8 2 0xff at 32 flowid 1:1
+	tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms
+
+	# simulate the ctstate check on OVS nf_conntrack
+	ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
+	ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP
+
+	# use a smaller number for assoc's max_retrans to reproduce the issue
+	modprobe sctp
+	ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3
+}
+
+cleanup() {
+	ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null
+	ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null
+	ip net del "$CLIENT_NS"
+	ip net del "$SERVER_NS"
+	ip net del "$ROUTER_NS"
+}
+
+do_test() {
+	ip net exec $SERVER_NS ./sctp_collision server \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
+	ip net exec $CLIENT_NS ./sctp_collision client \
+		$CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
+}
+
+# NOTE: one way to work around the issue is set a smaller hb_interval
+# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
+
+# run the test case
+trap cleanup EXIT
+setup && \
+echo "Test for SCTP Collision in nf_conntrack:" && \
+do_test && echo "PASS!"
+exit $?
diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
new file mode 100755
index 000000000000..e7d7bf13cff5
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that UNREPLIED tcp conntrack will eventually timeout.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+waittime=20
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+cleanup() {
+	ip netns pids $ns1 | xargs kill 2>/dev/null
+	ip netns pids $ns2 | xargs kill 2>/dev/null
+
+	ip netns del $ns1
+	ip netns del $ns2
+}
+
+ipv4() {
+    echo -n 192.168.$1.2
+}
+
+check_counter()
+{
+	ns=$1
+	name=$2
+	expect=$3
+	local lret=0
+
+	cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
+		ip netns exec $ns2 nft list counter inet filter "$name" 1>&2
+		lret=1
+	fi
+
+	return $lret
+}
+
+# Create test namespaces
+ip netns add $ns1 || exit 1
+
+trap cleanup EXIT
+
+ip netns add $ns2 || exit 1
+
+# Connect the namespace to the host using a veth pair
+ip -net $ns1 link add name veth1 type veth peer name veth2
+ip -net $ns1 link set netns $ns2 dev veth2
+
+ip -net $ns1 link set up dev lo
+ip -net $ns2 link set up dev lo
+ip -net $ns1 link set up dev veth1
+ip -net $ns2 link set up dev veth2
+
+ip -net $ns2 addr add 10.11.11.2/24 dev veth2
+ip -net $ns2 route add default via 10.11.11.1
+
+ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1
+
+# add a rule inside NS so we enable conntrack
+ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT
+
+ip -net $ns1 addr add 10.11.11.1/24 dev veth1
+ip -net $ns1 route add 10.99.99.99 via 10.11.11.2
+
+# Check connectivity works
+ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1
+
+ip netns exec $ns2 nc -l -p 8080 < /dev/null &
+
+# however, conntrack entries are there
+
+ip netns exec $ns2 nft -f - <<EOF
+table inet filter {
+	counter connreq { }
+	counter redir { }
+	chain input {
+		type filter hook input priority 0; policy accept;
+		ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
+		ct state new ct status dnat tcp dport 8080 counter name "redir" accept
+	}
+}
+EOF
+if [ $? -ne 0 ]; then
+	echo "ERROR: Could not load nft rules"
+	exit 1
+fi
+
+ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
+
+echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
+ip netns exec $ns1 bash -c 'while true ; do
+	nc -p 60000 10.99.99.99 80
+	sleep 1
+	done' &
+
+sleep 1
+
+ip netns exec $ns2 nft -f - <<EOF
+table inet nat {
+	chain prerouting {
+		type nat hook prerouting priority 0; policy accept;
+		ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
+	}
+}
+EOF
+if [ $? -ne 0 ]; then
+	echo "ERROR: Could not load nat redirect"
+	exit 1
+fi
+
+count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
+if [ $count -eq 0 ]; then
+	echo "ERROR: $ns2 did not pick up tcp connection from peer"
+	exit 1
+fi
+
+echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect"
+for i in $(seq 1 $waittime); do
+	echo -n "."
+
+	sleep 1
+
+	count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
+	if [ $count -gt 0 ]; then
+		echo
+		echo "PASS: redirection took effect after $i seconds"
+		break
+	fi
+
+	m=$((i%20))
+	if [ $m -eq 0 ]; then
+		echo " waited for $i seconds"
+	fi
+done
+
+expect="packets 1 bytes 60"
+check_counter "$ns2" "redir" "$expect"
+if [ $? -ne 0 ]; then
+	ret=1
+fi
+
+if [ $ret -eq 0 ];then
+	echo "PASS: redirection counter has expected values"
+else
+	echo "ERROR: no tcp connection was redirected"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
new file mode 100755
index 000000000000..8b5ea9234588
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
@@ -0,0 +1,241 @@
+#!/bin/sh
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device.  In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+ksft_skip=4
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+
+cleanup()
+{
+	ip netns pids $ns0 | xargs kill 2>/dev/null
+	ip netns pids $ns1 | xargs kill 2>/dev/null
+
+	ip netns del $ns0 $ns1
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add "$ns0"
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace $ns0"
+	exit $ksft_skip
+fi
+ip netns add "$ns1"
+
+trap cleanup EXIT
+
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not add veth device"
+	exit $ksft_skip
+fi
+
+ip -net $ns0 li add tvrf type vrf table 9876
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not add vrf device"
+	exit $ksft_skip
+fi
+
+ip -net $ns0 li set lo up
+
+ip -net $ns0 li set veth0 master tvrf
+ip -net $ns0 li set tvrf up
+ip -net $ns0 li set veth0 up
+ip -net $ns1 li set veth0 up
+
+ip -net $ns0 addr add $IP0/$PFXL dev veth0
+ip -net $ns1 addr add $IP1/$PFXL dev veth0
+
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not start iperf3"
+	exit $ksft_skip
+fi
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec $ns0 nft -f - <<EOF
+table testct {
+	chain rawpre {
+		type filter hook prerouting priority raw;
+
+		iif { veth0, tvrf } counter meta nftrace set 1
+		iif veth0 counter ct zone set 1 counter return
+		iif tvrf counter ct zone set 2 counter return
+		ip protocol icmp counter
+		notrack counter
+	}
+
+	chain rawout {
+		type filter hook output priority raw;
+
+		oif veth0 counter ct zone set 1 counter return
+		oif tvrf counter ct zone set 2 counter return
+		notrack counter
+	}
+}
+EOF
+	ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+
+	# should be in zone 1, not zone 2
+	count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+	if [ $count -eq 1 ]; then
+		echo "PASS: entry found in conntrack zone 1"
+	else
+		echo "FAIL: entry not found in conntrack zone 1"
+		count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+		if [ $count -eq 1 ]; then
+			echo "FAIL: entry found in zone 2 instead"
+		else
+			echo "FAIL: entry not in zone 1 or 2, dumping table"
+			ip netns exec $ns0 conntrack -L
+			ip netns exec $ns0 nft list ruleset
+		fi
+	fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+	local qdisc=$1
+
+	if [ "$qdisc" != "default" ]; then
+		tc -net $ns0 qdisc add dev tvrf root $qdisc
+	fi
+
+	ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+	chain rawout {
+		type filter hook output priority raw;
+
+		oif tvrf ct state untracked counter
+	}
+	chain postrouting2 {
+		type filter hook postrouting priority mangle;
+
+		oif tvrf ct state untracked counter
+	}
+	chain postrouting {
+		type nat hook postrouting priority 0;
+		# NB: masquerade should always be combined with 'oif(name) bla',
+		# lack of this is intentional here, we want to exercise double-snat.
+		ip saddr 172.30.30.0/30 counter masquerade random
+	}
+}
+EOF
+	ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
+	if [ $? -ne 0 ]; then
+		echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+		ret=1
+		return
+	fi
+
+	# must also check that nat table was evaluated on second (lower device) iteration.
+	ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
+	ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
+	if [ $? -eq 0 ]; then
+		echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+	else
+		echo "FAIL: vrf rules have unexpected counter value"
+		ret=1
+	fi
+
+	if [ "$qdisc" != "default" ]; then
+		tc -net $ns0 qdisc del dev tvrf root
+	fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+	ip netns exec $ns0 conntrack -F 2>/dev/null
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0;
+		meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+	}
+}
+EOF
+	ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
+	if [ $? -ne 0 ]; then
+		echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+		ret=1
+		return
+	fi
+
+	# must also check that nat table was evaluated on second (lower device) iteration.
+	ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+	if [ $? -eq 0 ]; then
+		echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+	else
+		echo "FAIL: vrf masq rule has unexpected counter value"
+		ret=1
+	fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf "default"
+test_masquerade_vrf "pfifo"
+test_masquerade_veth
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
new file mode 100755
index 000000000000..eb9553e4986b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
@@ -0,0 +1,207 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Conntrack needs to reassemble fragments in order to have complete
+# packets for rule matching.  Reassembly can lead to packet loss.
+
+# Consider the following setup:
+#            +--------+       +---------+       +--------+
+#            |Router A|-------|Wanrouter|-------|Router B|
+#            |        |.IPIP..|         |..IPIP.|        |
+#            +--------+       +---------+       +--------+
+#           /                  mtu 1400                   \
+#          /                                               \
+#+--------+                                                 +--------+
+#|Client A|                                                 |Client B|
+#|        |                                                 |        |
+#+--------+                                                 +--------+
+
+# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
+# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
+# on its interfaces.
+
+rnd=$(mktemp -u XXXXXXXX)
+rx=$(mktemp)
+
+r_a="ns-ra-$rnd"
+r_b="ns-rb-$rnd"
+r_w="ns-rw-$rnd"
+c_a="ns-ca-$rnd"
+c_b="ns-cb-$rnd"
+
+checktool (){
+	if ! $1 > /dev/null 2>&1; then
+		echo "SKIP: Could not $2"
+		exit $ksft_skip
+	fi
+}
+
+checktool "iptables --version" "run test without iptables"
+checktool "ip -Version" "run test without ip tool"
+checktool "which socat" "run test without socat"
+checktool "ip netns add ${r_a}" "create net namespace"
+
+for n in ${r_b} ${r_w} ${c_a} ${c_b};do
+	ip netns add ${n}
+done
+
+cleanup() {
+	for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
+		ip netns del ${n}
+	done
+	rm -f ${rx}
+}
+
+trap cleanup EXIT
+
+test_path() {
+	msg="$1"
+
+	ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null &
+
+	sleep 1
+	for i in 1 2 3; do
+		head -c1400 /dev/zero | tr "\000" "a" | \
+			ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000
+	done
+
+	wait
+
+	bytes=$(wc -c < ${rx})
+
+	if [ $bytes -eq 1400 ];then
+		echo "OK: PMTU $msg connection tracking"
+	else
+		echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
+		exit 1
+	fi
+}
+
+# Detailed setup for Router A
+# ---------------------------
+# Interfaces:
+# eth0: 10.2.2.1/24
+# eth1: 192.168.10.1/24
+# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
+# Routes:
+# 192.168.20.0/24 dev ipip0    (192.168.20.0/24 is subnet of Client B)
+# 10.4.4.1 via 10.2.2.254      (Router B via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
+ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
+
+l_addr="10.2.2.1"
+r_addr="10.4.4.1"
+ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+    ip -net ${r_a} link set $dev up
+done
+
+ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
+ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
+
+ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
+ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
+
+ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Detailed setup for Router B
+# ---------------------------
+# Interfaces:
+# eth0: 10.4.4.1/24
+# eth1: 192.168.20.1/24
+# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
+# Routes:
+# 192.168.10.0/24 dev ipip0    (192.168.10.0/24 is subnet of Client A)
+# 10.2.2.1 via 10.4.4.254      (Router A via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
+ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
+
+l_addr="10.4.4.1"
+r_addr="10.2.2.1"
+
+ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+	ip -net ${r_b} link set $dev up
+done
+
+ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
+ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
+
+ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
+ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Client A
+ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
+ip -net ${c_a} link set dev lo up
+ip -net ${c_a} link set dev veth0 up
+ip -net ${c_a} route add default via 192.168.10.1
+
+# Client A
+ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
+ip -net ${c_b} link set dev veth0 up
+ip -net ${c_b} link set dev lo up
+ip -net ${c_b} route add default via 192.168.20.1
+
+# Wan
+ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
+ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
+
+ip -net ${r_w} link set dev lo up
+ip -net ${r_w} link set dev veth0 up mtu 1400
+ip -net ${r_w} link set dev veth1 up mtu 1400
+
+ip -net ${r_a} link set dev veth0 mtu 1400
+ip -net ${r_b} link set dev veth0 mtu 1400
+
+ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Path MTU discovery
+# ------------------
+# Running tracepath from Client A to Client B shows PMTU discovery is working
+# as expected:
+#
+# clienta:~# tracepath 192.168.20.2
+# 1?: [LOCALHOST]                      pmtu 1500
+# 1:  192.168.10.1                                          0.867ms
+# 1:  192.168.10.1                                          0.302ms
+# 2:  192.168.10.1                                          0.312ms pmtu 1480
+# 2:  no reply
+# 3:  192.168.10.1                                          0.510ms pmtu 1380
+# 3:  192.168.20.2                                          2.320ms reached
+# Resume: pmtu 1380 hops 3 back 3
+
+# ip netns exec ${c_a} traceroute --mtu 192.168.20.2
+
+# Router A has learned PMTU (1400) to Router B from Wanrouter.
+# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
+# from Router A.
+
+#Send large UDP packet
+#---------------------
+#Now we send a 1400 bytes UDP packet from Client A to Client B:
+
+# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000
+test_path "without"
+
+# The IPv4 stack on Client A already knows the PMTU to Client B, so the
+# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
+# fragments between eth1 and ipip0. The fragments fit into the tunnel and
+# reach their destination.
+
+#When sending the large UDP packet again, Router A now reassembles the
+#fragments before routing the packet over ipip0. The resulting IPIP
+#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
+#dropped on Router A before sending.
+
+ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
+test_path "with"
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
new file mode 100755
index 000000000000..a1aa8f4a5828
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+socatpid=0
+
+cleanup()
+{
+	[ $socatpid -gt 0 ] && kill $socatpid
+	ip netns del $ns1
+	ip netns del $ns2
+}
+
+socat -h > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without socat"
+	exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without iptables"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add "$ns1"
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace $ns1"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns $ns1 dev veth1
+ip link set netns $ns2 dev veth2
+
+ip netns exec $ns1 ip link set up dev lo
+ip netns exec $ns1 ip link set up dev veth1
+ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec $ns2 ip link set up dev lo
+ip netns exec $ns2 ip link set up dev veth2
+ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
+socatpid=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
+
+sleep 1
+
+# add a persistent connection from the other namespace
+ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
+ret=$?
+
+# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+	echo "PASS: socat can connect via NAT'd address"
+else
+	echo "FAIL: socat cannot connect via NAT'd address"
+fi
+
+# check sport clashres.
+ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
+ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
+
+sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+cpid1=$!
+sleep 1
+
+# if connect succeeds, client closes instantly due to EOF on stdin.
+# if connect hangs, it will time out after 5s.
+echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+cpid2=$!
+
+time_then=$(date +%s)
+wait $cpid2
+rv=$?
+time_now=$(date +%s)
+
+# Check how much time has elapsed, expectation is for
+# 'cpid2' to connect and then exit (and no connect delay).
+delta=$((time_now - time_then))
+
+if [ $delta -lt 2 -a $rv -eq 0 ]; then
+	echo "PASS: could connect to service via redirected ports"
+else
+	echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+	ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh
new file mode 100755
index 000000000000..99ed5bd6e840
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_audit.sh
@@ -0,0 +1,245 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that audit logs generated for nft commands are as expected.
+
+SKIP_RC=4
+RC=0
+
+nft --version >/dev/null 2>&1 || {
+	echo "SKIP: missing nft tool"
+	exit $SKIP_RC
+}
+
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+# give other scripts a chance to finish - audit_logread sees all activity
+sleep 1
+
+logfile=$(mktemp)
+rulefile=$(mktemp)
+echo "logging into $logfile"
+./audit_logread >"$logfile" &
+logread_pid=$!
+trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT
+exec 3<"$logfile"
+
+do_test() { # (cmd, log)
+	echo -n "testing for cmd: $1 ... "
+	cat <&3 >/dev/null
+	$1 >/dev/null || exit 1
+	sleep 0.1
+	res=$(diff -a -u <(echo "$2") - <&3)
+	[ $? -eq 0 ] && { echo "OK"; return; }
+	echo "FAIL"
+	grep -v '^\(---\|+++\|@@\)' <<< "$res"
+	((RC--))
+}
+
+nft flush ruleset
+
+# adding tables, chains and rules
+
+for table in t1 t2; do
+	do_test "nft add table $table" \
+	"table=$table family=2 entries=1 op=nft_register_table"
+
+	do_test "nft add chain $table c1" \
+	"table=$table family=2 entries=1 op=nft_register_chain"
+
+	do_test "nft add chain $table c2; add chain $table c3" \
+	"table=$table family=2 entries=2 op=nft_register_chain"
+
+	cmd="add rule $table c1 counter"
+
+	do_test "nft $cmd" \
+	"table=$table family=2 entries=1 op=nft_register_rule"
+
+	do_test "nft $cmd; $cmd" \
+	"table=$table family=2 entries=2 op=nft_register_rule"
+
+	cmd=""
+	sep=""
+	for chain in c2 c3; do
+		for i in {1..3}; do
+			cmd+="$sep add rule $table $chain counter"
+			sep=";"
+		done
+	done
+	do_test "nft $cmd" \
+	"table=$table family=2 entries=6 op=nft_register_rule"
+done
+
+for ((i = 0; i < 500; i++)); do
+	echo "add rule t2 c3 counter accept comment \"rule $i\""
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=500 op=nft_register_rule'
+
+# adding sets and elements
+
+settype='type inet_service; counter'
+setelem='{ 22, 80, 443 }'
+setblock="{ $settype; elements = $setelem; }"
+do_test "nft add set t1 s $setblock" \
+"table=t1 family=2 entries=4 op=nft_register_set"
+
+do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \
+"table=t1 family=2 entries=5 op=nft_register_set"
+
+do_test "nft add element t1 s3 $setelem" \
+"table=t1 family=2 entries=3 op=nft_register_setelem"
+
+# adding counters
+
+do_test 'nft add counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add counter t2 c1; add counter t2 c2' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+	echo "add counter t2 c$i"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# adding/updating quotas
+
+do_test 'nft add quota t1 q1 { 10 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+	echo "add quota t2 q$i { 10 bytes }"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# changing the quota value triggers obj update path
+do_test 'nft add quota t1 q1 { 20 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+# resetting rules
+
+do_test 'nft reset rules t1 c2' \
+'table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules table t1' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules t2 c3' \
+'table=t2 family=2 entries=189 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=126 op=nft_reset_rule'
+
+do_test 'nft reset rules t2' \
+'table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=186 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=129 op=nft_reset_rule'
+
+do_test 'nft reset rules' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=180 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=135 op=nft_reset_rule'
+
+# resetting sets and elements
+
+elem=(22 ,80 ,443)
+relem=""
+for i in {1..3}; do
+	relem+="${elem[((i - 1))]}"
+	do_test "nft reset element t1 s { $relem }" \
+	"table=t1 family=2 entries=$i op=nft_reset_setelem"
+done
+
+do_test 'nft reset set t1 s' \
+'table=t1 family=2 entries=3 op=nft_reset_setelem'
+
+# resetting counters
+
+do_test 'nft reset counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t2' \
+'table=t2 family=2 entries=342 op=nft_reset_obj
+table=t2 family=2 entries=158 op=nft_reset_obj'
+
+do_test 'nft reset counters' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=341 op=nft_reset_obj
+table=t2 family=2 entries=159 op=nft_reset_obj'
+
+# resetting quotas
+
+do_test 'nft reset quota t1 q1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t2' \
+'table=t2 family=2 entries=315 op=nft_reset_obj
+table=t2 family=2 entries=185 op=nft_reset_obj'
+
+do_test 'nft reset quotas' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=314 op=nft_reset_obj
+table=t2 family=2 entries=186 op=nft_reset_obj'
+
+# deleting rules
+
+readarray -t handles < <(nft -a list chain t1 c1 | \
+			 sed -n 's/.*counter.* handle \(.*\)$/\1/p')
+
+do_test "nft delete rule t1 c1 handle ${handles[0]}" \
+'table=t1 family=2 entries=1 op=nft_unregister_rule'
+
+cmd='delete rule t1 c1 handle'
+do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \
+'table=t1 family=2 entries=2 op=nft_unregister_rule'
+
+do_test 'nft flush chain t1 c2' \
+'table=t1 family=2 entries=3 op=nft_unregister_rule'
+
+do_test 'nft flush table t2' \
+'table=t2 family=2 entries=509 op=nft_unregister_rule'
+
+# deleting chains
+
+do_test 'nft delete chain t2 c2' \
+'table=t2 family=2 entries=1 op=nft_unregister_chain'
+
+# deleting sets and elements
+
+do_test 'nft delete element t1 s { 22 }' \
+'table=t1 family=2 entries=1 op=nft_unregister_setelem'
+
+do_test 'nft delete element t1 s { 80, 443 }' \
+'table=t1 family=2 entries=2 op=nft_unregister_setelem'
+
+do_test 'nft flush set t1 s2' \
+'table=t1 family=2 entries=3 op=nft_unregister_setelem'
+
+do_test 'nft delete set t1 s2' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+do_test 'nft delete set t1 s3' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+exit $RC
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index 5a4938d6dcf2..e908009576c7 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -23,15 +23,15 @@ TESTS="reported_issues correctness concurrency timeout"
 
 # Set types, defined by TYPE_ variables below
 TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
-       net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
-       net_port_mac_proto_net"
+       net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
+       net6_port_net6_port net_port_mac_proto_net"
 
 # Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add"
+BUGS="flush_remove_add reload"
 
 # List of possible paths to pktgen script from kernel tree for performance tests
 PKTGEN_SCRIPT_PATHS="
-	../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+	../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
 	pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
 
 # Definition of set types:
@@ -91,7 +91,7 @@ src
 start		1
 count		5
 src_delta	2000
-tools		sendip nc bash
+tools		sendip socat nc bash
 proto		udp
 
 race_repeat	3
@@ -116,7 +116,7 @@ src
 start		10
 count		5
 src_delta	2000
-tools		sendip nc bash
+tools		sendip socat nc bash
 proto		udp6
 
 race_repeat	3
@@ -141,7 +141,7 @@ src
 start		1
 count		5
 src_delta	2000
-tools		sendip nc bash
+tools		sendip socat nc bash
 proto		udp
 
 race_repeat	0
@@ -163,7 +163,7 @@ src		mac
 start		10
 count		5
 src_delta	2000
-tools		sendip nc bash
+tools		sendip socat nc bash
 proto		udp6
 
 race_repeat	0
@@ -185,7 +185,7 @@ src		mac proto
 start		10
 count		5
 src_delta	2000
-tools		sendip nc bash
+tools		sendip socat nc bash
 proto		udp6
 
 race_repeat	0
@@ -207,7 +207,7 @@ src		addr4
 start		1
 count		5
 src_delta	2000
-tools		sendip nc bash
+tools		sendip socat nc bash
 proto		udp
 
 race_repeat	3
@@ -227,7 +227,7 @@ src		addr6 port
 start		10
 count		5
 src_delta	2000
-tools		sendip nc
+tools		sendip socat nc
 proto		udp6
 
 race_repeat	3
@@ -247,7 +247,7 @@ src		mac proto addr4
 start		1
 count		5
 src_delta	2000
-tools		sendip nc bash
+tools		sendip socat nc bash
 proto		udp
 
 race_repeat	0
@@ -264,7 +264,7 @@ src		mac
 start		1
 count		5
 src_delta	2000
-tools		sendip nc bash
+tools		sendip socat nc bash
 proto		udp
 
 race_repeat	0
@@ -277,6 +277,23 @@ perf_entries	1000
 perf_proto	ipv4
 "
 
+TYPE_mac_net="
+display		mac,net
+type_spec	ether_addr . ipv4_addr
+chain_spec	ether saddr . ip saddr
+dst		 
+src		mac addr4
+start		1
+count		5
+src_delta	2000
+tools		sendip socat nc bash
+proto		udp
+
+race_repeat	0
+
+perf_duration	0
+"
+
 TYPE_net_mac_icmp="
 display		net,mac - ICMP
 type_spec	ipv4_addr . ether_addr
@@ -320,7 +337,7 @@ src		addr4
 start		1
 count		5
 src_delta	2000
-tools		sendip nc
+tools		sendip socat nc
 proto		udp
 
 race_repeat	3
@@ -337,6 +354,23 @@ TYPE_flush_remove_add="
 display		Add two elements, flush, re-add
 "
 
+TYPE_reload="
+display		net,mac with reload
+type_spec	ipv4_addr . ether_addr
+chain_spec	ip daddr . ether saddr
+dst		addr4
+src		mac
+start		1
+count		1
+src_delta	2000
+tools		sendip socat nc bash
+proto		udp
+
+race_repeat	0
+
+perf_duration	0
+"
+
 # Set template for all tests, types and rules are filled in depending on test
 set_template='
 flush ruleset
@@ -507,6 +541,24 @@ setup_send_udp() {
 			dst_port=
 			src_addr4=
 		}
+	elif command -v socat -v >/dev/null; then
+		send_udp() {
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}" dev veth_b
+				__socatbind=",bind=${src_addr4}"
+				if [ -n "${src_port}" ];then
+					__socatbind="${__socatbind}:${src_port}"
+				fi
+			fi
+
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+			[ -z "${dst_port}" ] && dst_port=12345
+
+			echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:${dst_addr4}:${dst_port}"${__socatbind}"
+
+			src_addr4=
+			src_port=
+		}
 	elif command -v nc >/dev/null; then
 		if nc -u -w0 1.1.1.1 1 2>/dev/null; then
 			# OpenBSD netcat
@@ -572,6 +624,29 @@ setup_send_udp6() {
 			dst_port=
 			src_addr6=
 		}
+	elif command -v socat -v >/dev/null; then
+		send_udp6() {
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+
+			__socatbind6=
+
+			if [ -n "${src_addr6}" ]; then
+				if [ -n "${src_addr6} != "${src_addr6_added} ]; then
+					B ip addr add "${src_addr6}" dev veth_b nodad
+
+					src_addr6_added=${src_addr6}
+				fi
+
+				__socatbind6=",bind=[${src_addr6}]"
+
+				if [ -n "${src_port}" ] ;then
+					__socatbind6="${__socatbind6}:${src_port}"
+				fi
+			fi
+
+			echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:[${dst_addr6}]:${dst_port}"${__socatbind6}"
+		}
 	elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then
 		# GNU netcat might not work with IPv6, try next tool
 		send_udp6() {
@@ -984,7 +1059,8 @@ format() {
 		fi
 	done
 	for f in ${src}; do
-		__expr="${__expr} . "
+		[ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
 		__start="$(eval format_"${f}" "${srcstart}")"
 		__end="$(eval format_"${f}" "${srcend}")"
 
@@ -1455,6 +1531,59 @@ test_bug_flush_remove_add() {
 	nft flush ruleset
 }
 
+# - add ranged element, check that packets match it
+# - reload the set, check packets still match
+test_bug_reload() {
+	setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+	rstart=${start}
+
+	range_size=1
+	for i in $(seq "${start}" $((start + count))); do
+		end=$((start + range_size))
+
+		# Avoid negative or zero-sized port ranges
+		if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+			start=${end}
+			end=$((end + 1))
+		fi
+		srcstart=$((start + src_delta))
+		srcend=$((end + src_delta))
+
+		add "$(format)" || return 1
+		range_size=$((range_size + 1))
+		start=$((end + range_size))
+	done
+
+	# check kernel does allocate pcpu sctrach map
+	# for reload with no elemet add/delete
+	( echo flush set inet filter test ;
+	  nft list set inet filter test ) | nft -f -
+
+	start=${rstart}
+	range_size=1
+
+	for i in $(seq "${start}" $((start + count))); do
+		end=$((start + range_size))
+
+		# Avoid negative or zero-sized port ranges
+		if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+			start=${end}
+			end=$((end + 1))
+		fi
+		srcstart=$((start + src_delta))
+		srcend=$((end + src_delta))
+
+		for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+			send_match "${j}" $((j + src_delta)) || return 1
+		done
+
+		range_size=$((range_size + 1))
+		start=$((end + range_size))
+	done
+
+	nft flush ruleset
+}
+
 test_reported_issues() {
 	eval test_bug_"${subtest}"
 }
@@ -1513,4 +1642,4 @@ for name in ${TESTS}; do
 	done
 done
 
-[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP}
+[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0
diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
index edf0a48da6bf..faa7778d7bd1 100755
--- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
+++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
@@ -94,28 +94,50 @@ check_for_helper()
 	local message=$2
 	local port=$3
 
-	ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
+	if echo $message |grep -q 'ipv6';then
+		local family="ipv6"
+	else
+		local family="ipv4"
+	fi
+
+	ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
 	if [ $? -ne 0 ] ; then
-		echo "FAIL: ${netns} did not show attached helper $message" 1>&2
-		ret=1
+		if [ $autoassign -eq 0 ] ;then
+			echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+			ret=1
+		else
+			echo "PASS: ${netns} did not show attached helper $message" 1>&2
+		fi
+	else
+		if [ $autoassign -eq 0 ] ;then
+			echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+		else
+			echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
+			ret=1
+		fi
 	fi
 
-	echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
 	return 0
 }
 
 test_helper()
 {
 	local port=$1
-	local msg=$2
+	local autoassign=$2
+
+	if [ $autoassign -eq 0 ] ;then
+		msg="set via ruleset"
+	else
+		msg="auto-assign"
+	fi
 
 	sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null &
 
-	sleep 1
 	sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null &
+	sleep 1
 
-	check_for_helper "$ns1" "ip $msg" $port
-	check_for_helper "$ns2" "ip $msg" $port
+	check_for_helper "$ns1" "ip $msg" $port $autoassign
+	check_for_helper "$ns2" "ip $msg" $port $autoassign
 
 	wait
 
@@ -128,8 +150,8 @@ test_helper()
 
 	sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null &
 
-	sleep 1
 	sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null &
+	sleep 1
 
 	check_for_helper "$ns1" "ipv6 $msg" $port
 	check_for_helper "$ns2" "ipv6 $msg" $port
@@ -167,9 +189,9 @@ if [ $? -ne 0 ];then
 	fi
 fi
 
-test_helper 2121 "set via ruleset"
-ip netns exec ${ns1} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
-ip netns exec ${ns2} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
-test_helper 21 "auto-assign"
+test_helper 2121 0
+ip netns exec ${ns1} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec ${ns2} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 1
 
 exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
new file mode 100755
index 000000000000..dff476e45e77
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+	ip netns del ${ns1}
+	ip netns del ${ns2}
+	ip netns del ${nsrouter}
+
+	[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+dmesg | grep -q ' nft_rpfilter: '
+if [ $? -eq 0 ]; then
+	dmesg -c | grep ' nft_rpfilter: '
+	echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+load_ruleset() {
+	local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+	chain prerouting {
+		type filter hook prerouting priority 0; policy accept;
+	        fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+	}
+}
+EOF
+}
+
+load_pbr_ruleset() {
+	local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+	chain forward {
+		type filter hook forward priority raw;
+		fib saddr . iif oif gt 0 accept
+		log drop
+	}
+}
+EOF
+}
+
+load_ruleset_count() {
+	local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+	chain prerouting {
+		type filter hook prerouting priority 0; policy accept;
+		ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+		ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+	}
+}
+EOF
+}
+
+check_drops() {
+	dmesg | grep -q ' nft_rpfilter: '
+	if [ $? -eq 0 ]; then
+		dmesg | grep ' nft_rpfilter: '
+		echo "FAIL: rpfilter did drop packets"
+		return 1
+	fi
+
+	return 0
+}
+
+check_fib_counter() {
+	local want=$1
+	local ns=$2
+	local address=$3
+
+	line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
+	ret=$?
+
+	if [ $ret -ne 0 ];then
+		echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+		ip netns exec ${ns} nft list table inet filter
+		return 1
+	fi
+
+	if [ $want -gt 0 ]; then
+		echo "PASS: fib expression did drop packets for $address"
+	fi
+
+	return 0
+}
+
+load_ruleset ${nsrouter}
+load_ruleset ${ns1}
+load_ruleset ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+test_ping() {
+  local daddr4=$1
+  local daddr6=$2
+
+  ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
+  ret=$?
+  if [ $ret -ne 0 ];then
+	check_drops
+	echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+	return 1
+  fi
+
+  ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
+  ret=$?
+  if [ $ret -ne 0 ];then
+	check_drops
+	echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+	return 1
+  fi
+
+  return 0
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
+
+sleep 3
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec ${nsrouter} nft flush table inet filter
+
+ip -net ${ns1} route del default
+ip -net ${ns1} -6 route del default
+
+ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr del dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr add dead:2::99/64 dev eth0
+
+ip -net ${ns1} route add default via 10.0.2.1
+ip -net ${ns1} -6 route add default via dead:2::1
+
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count ${nsrouter}
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
+check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
+
+ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
+
+sleep 2
+ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+	echo "SKIP: Could not load fib forward ruleset"
+	exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+	ip -net ${nsrouter} nft list ruleset
+	echo "FAIL: fib mismatch in pbr setup"
+	exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
+exit 0
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 431296c0f91c..a32f490f7539 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -14,13 +14,17 @@
 # nft_flowtable.sh -o8000 -l1500 -r2000
 #
 
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsr1="nsr1-$sfx"
+nsr2="nsr2-$sfx"
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 ret=0
 
-ns1in=""
-ns2in=""
+nsin=""
 ns1out=""
 ns2out=""
 
@@ -36,21 +40,19 @@ checktool (){
 checktool "nft --version" "run test without nft tool"
 checktool "ip -Version" "run test without ip tool"
 checktool "which nc" "run test without nc (netcat)"
-checktool "ip netns add nsr1" "create net namespace"
+checktool "ip netns add $nsr1" "create net namespace $nsr1"
 
-ip netns add ns1
-ip netns add ns2
-
-ip netns add nsr2
+ip netns add $ns1
+ip netns add $ns2
+ip netns add $nsr2
 
 cleanup() {
-	for i in 1 2; do
-		ip netns del ns$i
-		ip netns del nsr$i
-	done
+	ip netns del $ns1
+	ip netns del $ns2
+	ip netns del $nsr1
+	ip netns del $nsr2
 
-	rm -f "$ns1in" "$ns1out"
-	rm -f "$ns2in" "$ns2out"
+	rm -f "$nsin" "$ns1out" "$ns2out"
 
 	[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
 }
@@ -59,22 +61,21 @@ trap cleanup EXIT
 
 sysctl -q net.netfilter.nf_log_all_netns=1
 
-ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
-ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
+ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
+ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
 
-ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
+ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
 
 for dev in lo veth0 veth1; do
-  for i in 1 2; do
-    ip -net nsr$i link set $dev up
-  done
+    ip -net $nsr1 link set $dev up
+    ip -net $nsr2 link set $dev up
 done
 
-ip -net nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net nsr1 addr add dead:1::1/64 dev veth0
+ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net $nsr1 addr add dead:1::1/64 dev veth0
 
-ip -net nsr2 addr add 10.0.2.1/24 dev veth1
-ip -net nsr2 addr add dead:2::1/64 dev veth1
+ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
+ip -net $nsr2 addr add dead:2::1/64 dev veth1
 
 # set different MTUs so we need to push packets coming from ns1 (large MTU)
 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
@@ -106,85 +107,76 @@ do
 	esac
 done
 
-if ! ip -net nsr1 link set veth0 mtu $omtu; then
+if ! ip -net $nsr1 link set veth0 mtu $omtu; then
 	exit 1
 fi
 
-ip -net ns1 link set eth0 mtu $omtu
+ip -net $ns1 link set eth0 mtu $omtu
 
-if ! ip -net nsr2 link set veth1 mtu $rmtu; then
+if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
 	exit 1
 fi
 
-ip -net ns2 link set eth0 mtu $rmtu
+ip -net $ns2 link set eth0 mtu $rmtu
 
 # transfer-net between nsr1 and nsr2.
 # these addresses are not used for connections.
-ip -net nsr1 addr add 192.168.10.1/24 dev veth1
-ip -net nsr1 addr add fee1:2::1/64 dev veth1
-
-ip -net nsr2 addr add 192.168.10.2/24 dev veth0
-ip -net nsr2 addr add fee1:2::2/64 dev veth0
-
-for i in 1 2; do
-  ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
-  ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-
-  ip -net ns$i link set lo up
-  ip -net ns$i link set eth0 up
-  ip -net ns$i addr add 10.0.$i.99/24 dev eth0
-  ip -net ns$i route add default via 10.0.$i.1
-  ip -net ns$i addr add dead:$i::99/64 dev eth0
-  ip -net ns$i route add default via dead:$i::1
-  if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
+ip -net $nsr1 addr add fee1:2::1/64 dev veth1
+
+ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
+ip -net $nsr2 addr add fee1:2::2/64 dev veth0
+
+for i in 0 1; do
+  ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+  ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+done
+
+for ns in $ns1 $ns2;do
+  ip -net $ns link set lo up
+  ip -net $ns link set eth0 up
+
+  if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
 	echo "ERROR: Check Originator/Responder values (problem during address addition)"
 	exit 1
   fi
-
   # don't set ip DF bit for first two tests
-  ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+  ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
 done
 
-ip -net nsr1 route add default via 192.168.10.2
-ip -net nsr2 route add default via 192.168.10.1
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns2 addr add 10.0.2.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns2 route add default via 10.0.2.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0
+ip -net $ns2 addr add dead:2::99/64 dev eth0
+ip -net $ns1 route add default via dead:1::1
+ip -net $ns2 route add default via dead:2::1
+
+ip -net $nsr1 route add default via 192.168.10.2
+ip -net $nsr2 route add default via 192.168.10.1
 
-ip netns exec nsr1 nft -f - <<EOF
+ip netns exec $nsr1 nft -f - <<EOF
 table inet filter {
   flowtable f1 {
      hook ingress priority 0
      devices = { veth0, veth1 }
    }
 
+   counter routed_orig { }
+   counter routed_repl { }
+
    chain forward {
       type filter hook forward priority 0; policy drop;
 
       # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
-      meta oif "veth1" tcp dport 12345 flow offload @f1 counter
-
-      # use packet size to trigger 'should be offloaded by now'.
-      # otherwise, if 'flow offload' expression never offloads, the
-      # test will pass.
-      tcp dport 12345 meta length gt 200 ct mark set 1 counter
+      meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
 
-      # this turns off flow offloading internally, so expect packets again
-      tcp flags fin,rst ct mark set 0 accept
-
-      # this allows large packets from responder, we need this as long
-      # as PMTUd is off.
-      # This rule is deleted for the last test, when we expect PMTUd
-      # to kick in and ensure all packets meet mtu requirements.
-      meta length gt $lmtu accept comment something-to-grep-for
-
-      # next line blocks connection w.o. working offload.
-      # we only do this for reverse dir, because we expect packets to
-      # enter slow path due to MTU mismatch of veth0 and veth1.
-      tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
+      # count packets supposedly offloaded as per direction.
+      ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
 
       ct state established,related accept
 
-      # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
-      meta length lt 200 oif "veth1" tcp dport 12345 counter accept
-
       meta nfproto ipv4 meta l4proto icmp accept
       meta nfproto ipv6 meta l4proto icmpv6 accept
    }
@@ -196,32 +188,51 @@ if [ $? -ne 0 ]; then
 	exit $ksft_skip
 fi
 
+ip netns exec $ns2 nft -f - <<EOF
+table inet filter {
+   counter ip4dscp0 { }
+   counter ip4dscp3 { }
+
+   chain input {
+      type filter hook input priority 0; policy accept;
+      meta l4proto tcp goto {
+	      ip dscp cs3 counter name ip4dscp3 accept
+	      ip dscp 0 counter name ip4dscp0 accept
+      }
+   }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+	echo "SKIP: Could not load nft ruleset"
+	exit $ksft_skip
+fi
+
 # test basic connectivity
-if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
-  echo "ERROR: ns1 cannot reach ns2" 1>&2
-  bash
+if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+  echo "ERROR: $ns1 cannot reach ns2" 1>&2
   exit 1
 fi
 
-if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
-  echo "ERROR: ns2 cannot reach ns1" 1>&2
+if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+  echo "ERROR: $ns2 cannot reach $ns1" 1>&2
   exit 1
 fi
 
 if [ $ret -eq 0 ];then
-	echo "PASS: netns routing/connectivity: ns1 can reach ns2"
+	echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
 fi
 
-ns1in=$(mktemp)
+nsin=$(mktemp)
 ns1out=$(mktemp)
-ns2in=$(mktemp)
 ns2out=$(mktemp)
 
 make_file()
 {
 	name=$1
 
-	SIZE=$((RANDOM % (1024 * 8)))
+	SIZE=$((RANDOM % (1024 * 128)))
+	SIZE=$((SIZE + (1024 * 8)))
 	TSIZE=$((SIZE * 1024))
 
 	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
@@ -232,6 +243,92 @@ make_file()
 	dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
 }
 
+check_counters()
+{
+	local what=$1
+	local ok=1
+
+	local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets)
+	local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets)
+
+	local orig_cnt=${orig#*bytes}
+	local repl_cnt=${repl#*bytes}
+
+	local fs=$(du -sb $nsin)
+	local max_orig=${fs%%/*}
+	local max_repl=$((max_orig/4))
+
+	if [ $orig_cnt -gt $max_orig ];then
+		echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
+		ret=1
+		ok=0
+	fi
+
+	if [ $repl_cnt -gt $max_repl ];then
+		echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
+		ret=1
+		ok=0
+	fi
+
+	if [ $ok -eq 1 ]; then
+		echo "PASS: $what"
+	fi
+}
+
+check_dscp()
+{
+	local what=$1
+	local ok=1
+
+	local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp3 | grep packets)
+
+	local pc4=${counter%*bytes*}
+	local pc4=${pc4#*packets}
+
+	local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp0 | grep packets)
+	local pc4z=${counter%*bytes*}
+	local pc4z=${pc4z#*packets}
+
+	case "$what" in
+	"dscp_none")
+		if [ $pc4 -gt 0 ] || [ $pc4z -eq 0 ]; then
+			echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+			ret=1
+			ok=0
+		fi
+		;;
+	"dscp_fwd")
+		if [ $pc4 -eq 0 ] || [ $pc4z -eq 0 ]; then
+			echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+			ret=1
+			ok=0
+		fi
+		;;
+	"dscp_ingress")
+		if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+			echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+			ret=1
+			ok=0
+		fi
+		;;
+	"dscp_egress")
+		if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+			echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+			ret=1
+			ok=0
+		fi
+		;;
+	*)
+		echo "FAIL: Unknown DSCP check" 1>&2
+		ret=1
+		ok=0
+	esac
+
+	if [ $ok -eq 1 ] ;then
+		echo "PASS: $what: dscp packet counters match"
+	fi
+}
+
 check_transfer()
 {
 	in=$1
@@ -256,30 +353,39 @@ test_tcp_forwarding_ip()
 	local dstport=$4
 	local lret=0
 
-	ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
+	ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" &
 	lpid=$!
 
 	sleep 1
-	ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
+	ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" &
 	cpid=$!
 
-	sleep 3
+	sleep 1
+
+	prev="$(ls -l $ns1out $ns2out)"
+	sleep 1
+
+	while [[ "$prev" != "$(ls -l $ns1out $ns2out)" ]]; do
+		sleep 1;
+		prev="$(ls -l $ns1out $ns2out)"
+	done
 
-	if ps -p $lpid > /dev/null;then
+	if test -d /proc/"$lpid"/; then
 		kill $lpid
 	fi
 
-	if ps -p $cpid > /dev/null;then
+	if test -d /proc/"$cpid"/; then
 		kill $cpid
 	fi
 
-	wait
+	wait $lpid
+	wait $cpid
 
-	if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
+	if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
 		lret=1
 	fi
 
-	if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
+	if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
 		lret=1
 	fi
 
@@ -293,44 +399,107 @@ test_tcp_forwarding()
 	return $?
 }
 
+test_tcp_forwarding_set_dscp()
+{
+	check_dscp "dscp_none"
+
+ip netns exec $nsr1 nft -f - <<EOF
+table netdev dscpmangle {
+   chain setdscp0 {
+      type filter hook ingress device "veth0" priority 0; policy accept
+	ip dscp set cs3
+  }
+}
+EOF
+if [ $? -eq 0 ]; then
+	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
+	check_dscp "dscp_ingress"
+
+	ip netns exec $nsr1 nft delete table netdev dscpmangle
+else
+	echo "SKIP: Could not load netdev:ingress for veth0"
+fi
+
+ip netns exec $nsr1 nft -f - <<EOF
+table netdev dscpmangle {
+   chain setdscp0 {
+      type filter hook egress device "veth1" priority 0; policy accept
+      ip dscp set cs3
+  }
+}
+EOF
+if [ $? -eq 0 ]; then
+	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
+	check_dscp "dscp_egress"
+
+	ip netns exec $nsr1 nft flush table netdev dscpmangle
+else
+	echo "SKIP: Could not load netdev:egress for veth1"
+fi
+
+	# partial.  If flowtable really works, then both dscp-is-0 and dscp-is-cs3
+	# counters should have seen packets (before and after ft offload kicks in).
+	ip netns exec $nsr1 nft -a insert rule inet filter forward ip dscp set cs3
+	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
+	check_dscp "dscp_fwd"
+}
+
 test_tcp_forwarding_nat()
 {
 	local lret
+	local pmtu
 
 	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
 	lret=$?
 
+	pmtu=$3
+	what=$4
+
 	if [ $lret -eq 0 ] ; then
+		if [ $pmtu -eq 1 ] ;then
+			check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+		else
+			echo "PASS: flow offload for ns1/ns2 with masquerade $what"
+		fi
+
 		test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
 		lret=$?
+		if [ $pmtu -eq 1 ] ;then
+			check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+		elif [ $lret -eq 0 ] ; then
+			echo "PASS: flow offload for ns1/ns2 with dnat $what"
+		fi
 	fi
 
 	return $lret
 }
 
-make_file "$ns1in"
-make_file "$ns2in"
+make_file "$nsin"
 
 # First test:
 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
-if test_tcp_forwarding ns1 ns2; then
+# Due to MTU mismatch in both directions, all packets (except small packets like pure
+# acks) have to be handled by normal forwarding path.  Therefore, packet counters
+# are not checked.
+if test_tcp_forwarding $ns1 $ns2; then
 	echo "PASS: flow offloaded for ns1/ns2"
 else
 	echo "FAIL: flow offload for ns1/ns2:" 1>&2
-	ip netns exec nsr1 nft list ruleset
+	ip netns exec $nsr1 nft list ruleset
 	ret=1
 fi
 
 # delete default route, i.e. ns2 won't be able to reach ns1 and
 # will depend on ns1 being masqueraded in nsr1.
 # expect ns1 has nsr1 address.
-ip -net ns2 route del default via 10.0.2.1
-ip -net ns2 route del default via dead:2::1
-ip -net ns2 route add 192.168.10.1 via 10.0.2.1
+ip -net $ns2 route del default via 10.0.2.1
+ip -net $ns2 route del default via dead:2::1
+ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
 
 # Second test:
-# Same, but with NAT enabled.
-ip netns exec nsr1 nft -f - <<EOF
+# Same, but with NAT enabled.  Same as in first test: we expect normal forward path
+# to handle most packets.
+ip netns exec $nsr1 nft -f - <<EOF
 table ip nat {
    chain prerouting {
       type nat hook prerouting priority 0; policy accept;
@@ -344,35 +513,117 @@ table ip nat {
 }
 EOF
 
-if test_tcp_forwarding_nat ns1 ns2; then
-	echo "PASS: flow offloaded for ns1/ns2 with NAT"
-else
+if ! test_tcp_forwarding_set_dscp $ns1 $ns2 0 ""; then
+	echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+	exit 0
+fi
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then
 	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
-	ip netns exec nsr1 nft list ruleset
+	ip netns exec $nsr1 nft list ruleset
 	ret=1
 fi
 
 # Third test:
-# Same as second test, but with PMTU discovery enabled.
-handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
+# Same as second test, but with PMTU discovery enabled. This
+# means that we expect the fastpath to handle packets as soon
+# as the endpoints adjust the packet size.
+ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+
+# reset counters.
+# With pmtu in-place we'll also check that nft counters
+# are lower than file size and packets were forwarded via flowtable layer.
+# For earlier tests (large mtus), packets cannot be handled via flowtable
+# (except pure acks and other small packets).
+ip netns exec $nsr1 nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then
+	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
+	ip netns exec $nsr1 nft list ruleset
+fi
 
-if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
-	echo "FAIL: Could not delete large-packet accept rule"
-	exit 1
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+ip -net $nsr1 link add name br0 type bridge
+ip -net $nsr1 addr flush dev veth0
+ip -net $nsr1 link set up dev veth0
+ip -net $nsr1 link set veth0 master br0
+ip -net $nsr1 addr add 10.0.1.1/24 dev br0
+ip -net $nsr1 addr add dead:1::1/64 dev br0
+ip -net $nsr1 link set up dev br0
+
+ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec $nsr1 nft -f - <<EOF
+flush table ip nat
+table ip nat {
+   chain prerouting {
+      type nat hook prerouting priority 0; policy accept;
+      meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+   }
+
+   chain postrouting {
+      type nat hook postrouting priority 0; policy accept;
+      meta oifname "veth1" counter masquerade
+   }
+}
+EOF
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then
+	echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+	ip netns exec $nsr1 nft list ruleset
+	ret=1
 fi
 
-ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 
-if test_tcp_forwarding_nat ns1 ns2; then
-	echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
-else
-	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
-	ip netns exec nsr1 nft list ruleset
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net $nsr1 link set veth0 nomaster
+ip -net $nsr1 link set down dev veth0
+ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
+ip -net $nsr1 link set up dev veth0
+ip -net $nsr1 link set up dev veth0.10
+ip -net $nsr1 link set veth0.10 master br0
+
+ip -net $ns1 addr flush dev eth0
+ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
+ip -net $ns1 link set eth0 up
+ip -net $ns1 link set eth0.10 up
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0.10
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then
+	echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+	ip netns exec $nsr1 nft list ruleset
+	ret=1
 fi
 
-KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
-KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
+# restore test topology (remove bridge and VLAN)
+ip -net $nsr1 link set veth0 nomaster
+ip -net $nsr1 link set veth0 down
+ip -net $nsr1 link set veth0.10 down
+ip -net $nsr1 link delete veth0.10 type vlan
+ip -net $nsr1 link delete br0 type bridge
+ip -net $ns1 addr flush dev eth0.10
+ip -net $ns1 link set eth0.10 down
+ip -net $ns1 link set eth0 down
+ip -net $ns1 link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net $ns1 link set eth0 up
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0
+ip -net $ns1 route add default via dead:1::1
+ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net $nsr1 addr add dead:1::1/64 dev veth0
+ip -net $nsr1 link set up dev veth0
+
+KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
+KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
 SPI1=$RANDOM
 SPI2=$RANDOM
 
@@ -399,23 +650,23 @@ do_esp() {
 
 }
 
-do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
 
-do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
 
-ip netns exec nsr1 nft delete table ip nat
+ip netns exec $nsr1 nft delete table ip nat
 
 # restore default routes
-ip -net ns2 route del 192.168.10.1 via 10.0.2.1
-ip -net ns2 route add default via 10.0.2.1
-ip -net ns2 route add default via dead:2::1
+ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
+ip -net $ns2 route add default via 10.0.2.1
+ip -net $ns2 route add default via dead:2::1
 
-if test_tcp_forwarding ns1 ns2; then
-	echo "PASS: ipsec tunnel mode for ns1/ns2"
+if test_tcp_forwarding $ns1 $ns2; then
+	check_counters "ipsec tunnel mode for ns1/ns2"
 else
 	echo "FAIL: ipsec tunnel mode for ns1/ns2"
-	ip netns exec nsr1 nft list ruleset 1>&2
-	ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
+	ip netns exec $nsr1 nft list ruleset 1>&2
+	ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
 fi
 
 exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
index 087f0e6e71ce..f33154c04d34 100755
--- a/tools/testing/selftests/netfilter/nft_meta.sh
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -23,7 +23,7 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo
 
 trap cleanup EXIT
 
-currentyear=$(date +%G)
+currentyear=$(date +%Y)
 lastyear=$((currentyear-1))
 ip netns exec "$ns0" nft -f /dev/stdin <<EOF
 table inet filter {
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index d7e07f4c3d7f..dd40d9f6f259 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -374,6 +374,47 @@ EOF
 	return $lret
 }
 
+test_local_dnat_portonly()
+{
+	local family=$1
+	local daddr=$2
+	local lret=0
+	local sr_s
+	local sr_r
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		meta l4proto tcp dnat to :2000
+
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		if [ $family = "inet" ];then
+			echo "SKIP: inet port test"
+			test_inet_nat=false
+			return
+		fi
+		echo "SKIP: Could not add $family dnat hook"
+		return
+	fi
+
+	echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
+	sc_s=$!
+
+	sleep 1
+
+	result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+
+	if [ "$result" = "SERVER-inet" ];then
+		echo "PASS: inet port rewrite without l3 address"
+	else
+		echo "ERROR: inet port rewrite"
+		ret=1
+	fi
+}
 
 test_masquerade6()
 {
@@ -741,6 +782,300 @@ EOF
 	return $lret
 }
 
+# test port shadowing.
+# create two listening services, one on router (ns0), one
+# on client (ns2), which is masqueraded from ns1 point of view.
+# ns2 sends udp packet coming from service port to ns1, on a highport.
+# Later, if n1 uses same highport to connect to ns0:service, packet
+# might be port-forwarded to ns2 instead.
+
+# second argument tells if we expect the 'fake-entry' to take effect
+# (CLIENT) or not (ROUTER).
+test_port_shadow()
+{
+	local test=$1
+	local expect=$2
+	local daddrc="10.0.1.99"
+	local daddrs="10.0.1.1"
+	local result=""
+	local logmsg=""
+
+	# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+	echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
+
+	echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
+	sc_r=$!
+
+	echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
+	sc_c=$!
+
+	sleep 0.3
+
+	# ns1 tries to connect to ns0:1405.  With default settings this should connect
+	# to client, it matches the conntrack entry created above.
+
+	result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
+
+	if [ "$result" = "$expect" ] ;then
+		echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
+	else
+		echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended"
+		ret=1
+	fi
+
+	kill $sc_r $sc_c 2>/dev/null
+
+	# flush udp entries for next test round, if any
+	ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
+}
+
+# This prevents port shadow of router service via packet filter,
+# packets claiming to originate from service port from internal
+# network are dropped.
+test_port_shadow_filter()
+{
+	local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family filter {
+	chain forward {
+		type filter hook forward priority 0; policy accept;
+		meta iif veth1 udp sport 1405 drop
+	}
+}
+EOF
+	test_port_shadow "port-filter" "ROUTER"
+
+	ip netns exec "$ns0" nft delete table $family filter
+}
+
+# This prevents port shadow of router service via notrack.
+test_port_shadow_notrack()
+{
+	local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family raw {
+	chain prerouting {
+		type filter hook prerouting priority -300; policy accept;
+		meta iif veth0 udp dport 1405 notrack
+	}
+	chain output {
+		type filter hook output priority -300; policy accept;
+		meta oif veth0 udp sport 1405 notrack
+	}
+}
+EOF
+	test_port_shadow "port-notrack" "ROUTER"
+
+	ip netns exec "$ns0" nft delete table $family raw
+}
+
+# This prevents port shadow of router service via sport remap.
+test_port_shadow_pat()
+{
+	local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family pat {
+	chain postrouting {
+		type nat hook postrouting priority -1; policy accept;
+		meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random
+	}
+}
+EOF
+	test_port_shadow "pat" "ROUTER"
+
+	ip netns exec "$ns0" nft delete table $family pat
+}
+
+test_port_shadowing()
+{
+	local family="ip"
+
+	conntrack -h >/dev/null 2>&1
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+		return
+	fi
+
+	socat -h > /dev/null 2>&1
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not run nat port shadowing test without socat tool"
+		return
+	fi
+
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		meta oif veth0 masquerade
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family masquerade hook"
+		return $ksft_skip
+	fi
+
+	# test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+	test_port_shadow "default" "CLIENT"
+
+	# test packet filter based mitigation: prevent forwarding of
+	# packets claiming to come from the service port.
+	test_port_shadow_filter "$family"
+
+	# test conntrack based mitigation: connections going or coming
+	# from router:service bypass connection tracking.
+	test_port_shadow_notrack "$family"
+
+	# test nat based mitigation: fowarded packets coming from service port
+	# are masqueraded with random highport.
+	test_port_shadow_pat "$family"
+
+	ip netns exec "$ns0" nft delete table $family nat
+}
+
+test_stateless_nat_ip()
+{
+	local lret=0
+
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
+		return 1
+	fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table ip stateless {
+	map xlate_in {
+		typeof meta iifname . ip saddr . ip daddr : ip daddr
+		elements = {
+			"veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2,
+		}
+	}
+	map xlate_out {
+		typeof meta iifname . ip saddr . ip daddr : ip daddr
+		elements = {
+			"veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99
+		}
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority -400; policy accept;
+		ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in
+		ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add ip statless rules"
+		return $ksft_skip
+	fi
+
+	reset_counters
+
+	ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
+		lret=1
+	fi
+
+	# ns1 should have seen packets from .2.2, due to stateless rewrite.
+	expect="packets 1 bytes 84"
+	cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
+		lret=1
+	fi
+
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
+			lret=1
+		fi
+	done
+
+	# ns1 should not have seen packets from ns2, due to masquerade
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+	socat -h > /dev/null 2>&1
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not run stateless nat frag test without socat tool"
+		if [ $lret -eq 0 ]; then
+			return $ksft_skip
+		fi
+
+		ip netns exec "$ns0" nft delete table ip stateless
+		return $lret
+	fi
+
+	local tmpfile=$(mktemp)
+	dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null
+
+	local outfile=$(mktemp)
+	ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null &
+	sc_r=$!
+
+	sleep 1
+	# re-do with large ping -> ip fragmentation
+	ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
+		lret=1
+	fi
+
+	wait
+
+	cmp "$tmpfile" "$outfile"
+	if [ $? -ne 0 ]; then
+		ls -l "$tmpfile" "$outfile"
+		echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
+		lret=1
+	fi
+
+	rm -f "$tmpfile" "$outfile"
+
+	# ns1 should have seen packets from 2.2, due to stateless rewrite.
+	expect="packets 3 bytes 4164"
+	cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
+		lret=1
+	fi
+
+	ip netns exec "$ns0" nft delete table ip stateless
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not delete table ip stateless" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IP statless for $ns2"
+
+	return $lret
+}
 
 # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
 for i in 0 1 2; do
@@ -808,6 +1143,19 @@ table inet filter {
 EOF
 done
 
+# special case for stateless nat check, counter needs to
+# be done before (input) ip defragmentation
+ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF
+table inet filter {
+	counter ns0insl {}
+
+	chain pre {
+		type filter hook prerouting priority -400; policy accept;
+		ip saddr 10.0.2.2 counter name "ns0insl"
+	}
+}
+EOF
+
 sleep 3
 # test basic connectivity
 for i in 1 2; do
@@ -841,6 +1189,10 @@ fi
 reset_counters
 test_local_dnat ip
 test_local_dnat6 ip6
+
+reset_counters
+test_local_dnat_portonly inet 10.0.1.99
+
 reset_counters
 $test_inet_nat && test_local_dnat inet
 $test_inet_nat && test_local_dnat6 inet
@@ -861,6 +1213,9 @@ reset_counters
 $test_inet_nat && test_redirect inet
 $test_inet_nat && test_redirect6 inet
 
+test_port_shadowing
+test_stateless_nat_ip
+
 if [ $ret -ne 0 ];then
 	echo -n "FAIL: "
 	nft --version
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755
index 000000000000..b9ab37380f33
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+#            veth1-.
+#                  |
+#               NAT Gateway --veth0--> Server
+#                  | |
+#            veth2-' |
+# client2---'        |
+#  ....              |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces.  Each client connects to Server, each with colliding tuples:
+#   clientsaddr:10000 -> serveraddr:dport
+#   NAT Gateway is supposed to do port reallocation for each of the
+#   connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+	ip netns del $gw
+	ip netns del $srv
+	for i in $(seq 1 $maxclients); do
+		ip netns del ns-cl$i-$sfx 2>/dev/null
+	done
+
+	sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+	sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+	sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without conntrack tool"
+	exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+	have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace $gw"
+	exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create server netns $srv"
+	exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512  2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512  2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+  cl="ns-cl$i-$sfx"
+
+  ip netns add "$cl"
+  if [ $? -ne 0 ];then
+     echo "SKIP: Could not create client netns $cl"
+     exit $ksft_skip
+  fi
+  ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+  if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+  fi
+done
+
+for i in $(seq 1 $maxclients);do
+  cl="ns-cl$i-$sfx"
+  echo netns exec "$cl" ip link set lo up
+  echo netns exec "$cl" ip link set eth0 up
+  echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+  echo netns exec "$gw" ip link set veth$i up
+  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+  # clients have same IP addresses.
+  echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+  echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+  echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+  echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+  # NB: same addresses on client-facing interfaces.
+  echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+  echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+  # gw: policy routing
+  echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+  echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+  echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+  echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+  echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+	map iiftomark {
+		type ifname : mark
+	}
+
+	map iiftozone {
+		typeof iifname : ct zone
+	}
+
+	set inicmp {
+		flags dynamic
+		type ipv4_addr . ifname . ipv4_addr
+	}
+	set inflows {
+		flags dynamic
+		type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+	}
+
+	set inflows6 {
+		flags dynamic
+		type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority -64000; policy accept;
+		ct original zone set meta iifname map @iiftozone
+		meta mark set meta iifname map @iiftomark
+
+		tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+		add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+		ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+	}
+
+	chain nat_postrouting {
+		type nat hook postrouting priority 0; policy accept;
+                ct mark set meta mark meta oifname veth0 masquerade
+	}
+
+	chain mangle_prerouting {
+		type filter hook prerouting priority -100; policy accept;
+		ct direction reply meta mark set ct mark
+	}
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+	for i in $(seq 1 $((maxclients-1))); do
+		echo \"veth$i\" : $i,
+	done
+	echo \"veth$maxclients\" : $maxclients \}
+	echo add element inet raw iiftozone \{
+	for i in $(seq 1 $((maxclients-1))); do
+		echo \"veth$i\" : $i,
+	done
+	echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+  cl="ns-cl$i-$sfx"
+  ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+  if [ $? -ne 0 ]; then
+     echo FAIL: Ping failure from $cl 1>&2
+     ret=1
+     break
+  fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+   ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+   if [ $? -ne 0 ];then
+      ret=1
+      echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+      ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+      break
+   fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+    ret=1
+    echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+    ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if  [ $ret -eq 0 ]; then
+	echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+	echo "SKIP: iperf3 not installed"
+	if [ $ret -ne 0 ];then
+	    exit $ret
+	fi
+	exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+  if [ $ret -ne 0 ]; then
+     break
+  fi
+  cl="ns-cl$i-$sfx"
+  ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+  if [ $? -ne 0 ]; then
+     echo FAIL: Failure to connect for $cl 1>&2
+     ip netns exec $gw conntrack -S 1>&2
+     ret=1
+  fi
+done
+if [ $ret -eq 0 ];then
+	echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+   ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+   if [ $? -ne 0 ];then
+      ret=1
+      echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+      break
+   fi
+done
+if [ $ret -eq 0 ];then
+	echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+    ret=1
+    echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 3d202b90b33d..e12729753351 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -16,6 +16,10 @@ timeout=4
 
 cleanup()
 {
+	ip netns pids ${ns1} | xargs kill 2>/dev/null
+	ip netns pids ${ns2} | xargs kill 2>/dev/null
+	ip netns pids ${nsrouter} | xargs kill 2>/dev/null
+
 	ip netns del ${ns1}
 	ip netns del ${ns2}
 	ip netns del ${nsrouter}
@@ -109,6 +113,7 @@ table inet $name {
 	chain output {
 		type filter hook output priority $prio; policy accept;
 		tcp dport 12345 queue num 3
+		tcp sport 23456 queue num 3
 		jump nfq
 	}
 	chain post {
@@ -292,6 +297,23 @@ test_tcp_localhost()
 	wait 2>/dev/null
 }
 
+test_tcp_localhost_connectclose()
+{
+	tmpfile=$(mktemp) || exit 1
+
+	ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout &
+
+	ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
+	local nfqpid=$!
+
+	sleep 1
+	rm -f "$tmpfile"
+
+	wait $rpid
+	[ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close"
+	wait 2>/dev/null
+}
+
 test_tcp_localhost_requeue()
 {
 ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
@@ -332,6 +354,55 @@ EOF
 	echo "PASS: tcp via loopback and re-queueing"
 }
 
+test_icmp_vrf() {
+	ip -net $ns1 link add tvrf type vrf table 9876
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not add vrf device"
+		return
+	fi
+
+	ip -net $ns1 li set eth0 master tvrf
+	ip -net $ns1 li set tvrf up
+
+	ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec ${ns1} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+	chain output {
+		type filter hook output priority 0; policy accept;
+		meta oifname "tvrf" icmp type echo-request counter queue num 1
+		meta oifname "eth0" icmp type echo-request counter queue num 1
+	}
+	chain post {
+		type filter hook postrouting priority 0; policy accept;
+		meta oifname "tvrf" icmp type echo-request counter queue num 1
+		meta oifname "eth0" icmp type echo-request counter queue num 1
+	}
+}
+EOF
+	ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
+	local nfqpid=$!
+
+	sleep 1
+	ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+	for n in output post; do
+		for d in tvrf eth0; do
+			ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
+			if [ $? -ne 0 ] ; then
+				echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+				ip netns exec ${ns1} nft list ruleset
+				ret=1
+				return
+			fi
+		done
+	done
+
+	wait $nfqpid
+	[ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
+	wait 2>/dev/null
+}
+
 ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -371,6 +442,8 @@ test_queue 20
 
 test_tcp_forward
 test_tcp_localhost
+test_tcp_localhost_connectclose
 test_tcp_localhost_requeue
+test_icmp_vrf
 
 exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh
new file mode 100755
index 000000000000..b62933b680d6
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_synproxy.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+rnd=$(mktemp -u XXXXXXXX)
+nsr="nsr-$rnd"	# synproxy machine
+ns1="ns1-$rnd"  # iperf client
+ns2="ns2-$rnd"  # iperf server
+
+checktool (){
+	if ! $1 > /dev/null 2>&1; then
+		echo "SKIP: Could not $2"
+		exit $ksft_skip
+	fi
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "iperf3 --version" "run test without iperf3"
+checktool "ip netns add $nsr" "create net namespace"
+
+modprobe -q nf_conntrack
+
+ip netns add $ns1
+ip netns add $ns2
+
+cleanup() {
+	ip netns pids $ns1 | xargs kill 2>/dev/null
+	ip netns pids $ns2 | xargs kill 2>/dev/null
+	ip netns del $ns1
+	ip netns del $ns2
+
+	ip netns del $nsr
+}
+
+trap cleanup EXIT
+
+ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1
+ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2
+
+for dev in lo veth0 veth1; do
+ip -net $nsr link set $dev up
+done
+
+ip -net $nsr addr add 10.0.1.1/24 dev veth0
+ip -net $nsr addr add 10.0.2.1/24 dev veth1
+
+ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1
+ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
+
+for n in $ns1 $ns2; do
+  ip -net $n link set lo up
+  ip -net $n link set eth0 up
+done
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns2 addr add 10.0.2.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns2 route add default via 10.0.2.1
+
+# test basic connectivity
+if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+  echo "ERROR: $ns1 cannot reach $ns2" 1>&2
+  exit 1
+fi
+
+if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+  echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+  exit 1
+fi
+
+ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 &
+# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
+
+sleep 1
+
+ip netns exec $nsr nft -f - <<EOF
+table inet filter {
+   chain prerouting {
+      type filter hook prerouting priority -300; policy accept;
+      meta iif veth0 tcp flags syn counter notrack
+   }
+
+  chain forward {
+      type filter hook forward priority 0; policy accept;
+
+      ct state new,established counter accept
+
+      meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
+
+      ct state invalid counter drop
+
+      # make ns2 unreachable w.o. tcp synproxy
+      tcp flags syn counter drop
+   }
+}
+EOF
+if [ $? -ne 0 ]; then
+	echo "SKIP: Cannot add nft synproxy"
+	exit $ksft_skip
+fi
+
+ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null
+
+if [ $? -ne 0 ]; then
+	echo "FAIL: iperf3 returned an error" 1>&2
+	ret=$?
+	ip netns exec $nsr nft list ruleset
+else
+	echo "PASS: synproxy connection successful"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
index f1affd12c4b1..2ffba45a78bf 100755
--- a/tools/testing/selftests/netfilter/nft_trans_stress.sh
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -9,8 +9,35 @@
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
-testns=testns1
+testns=testns-$(mktemp -u "XXXXXXXX")
+tmp=""
+
 tables="foo bar baz quux"
+global_ret=0
+eret=0
+lret=0
+
+cleanup() {
+	ip netns pids "$testns" | xargs kill 2>/dev/null
+	ip netns del "$testns"
+
+	rm -f "$tmp"
+}
+
+check_result()
+{
+	local r=$1
+	local OK="PASS"
+
+	if [ $r -ne 0 ] ;then
+		OK="FAIL"
+		global_ret=$r
+	fi
+
+	echo "$OK: nft $2 test returned $r"
+
+	eret=0
+}
 
 nft --version > /dev/null 2>&1
 if [ $? -ne 0 ];then
@@ -24,6 +51,7 @@ if [ $? -ne 0 ];then
 	exit $ksft_skip
 fi
 
+trap cleanup EXIT
 tmp=$(mktemp)
 
 for table in $tables; do
@@ -59,20 +87,65 @@ done)
 
 sleep 1
 
+ip netns exec "$testns" nft -f "$tmp"
 for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
 
 for table in $tables;do
-	randsleep=$((RANDOM%10))
+	randsleep=$((RANDOM%2))
 	sleep $randsleep
-	ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+	ip netns exec "$testns" nft delete table inet $table
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		eret=$lret
+	fi
+done
+
+check_result $eret "add/delete"
+
+for i in $(seq 1 10) ; do
+	(echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
+
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		eret=$lret
+	fi
+done
+
+check_result $eret "reload"
+
+for i in $(seq 1 10) ; do
+	(echo "flush ruleset"; cat "$tmp"
+	 echo "insert rule inet foo INPUT meta nftrace set 1"
+	 echo "insert rule inet foo OUTPUT meta nftrace set 1"
+	 ) | ip netns exec "$testns" nft -f /dev/stdin
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		eret=$lret
+	fi
+
+	(echo "flush ruleset"; cat "$tmp"
+	 ) | ip netns exec "$testns" nft -f /dev/stdin
+
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		eret=$lret
+	fi
 done
 
-randsleep=$((RANDOM%10))
-sleep $randsleep
+check_result $eret "add/delete with nftrace enabled"
+
+echo "insert rule inet foo INPUT meta nftrace set 1" >> $tmp
+echo "insert rule inet foo OUTPUT meta nftrace set 1" >> $tmp
 
-pkill -9 ping
+for i in $(seq 1 10) ; do
+	(echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
+
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		eret=1
+	fi
+done
 
-wait
+check_result $lret "add/delete with nftrace enabled"
 
-rm -f "$tmp"
-ip netns del "$testns"
+exit $global_ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755
index 000000000000..5a8db0b48928
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=2000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+	ip netns del $ns
+}
+
+checktool (){
+	if ! $1 > /dev/null 2>&1; then
+		echo "SKIP: Could not $2"
+		exit $ksft_skip
+	fi
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "socat -V" "run test without socat tool"
+checktool "ip netns add $ns" "create net namespace"
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+	have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+	local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+	map rndzone {
+		typeof numgen inc mod $max_zones : ct zone
+	}
+
+	chain output {
+		type filter hook output priority -64000; policy accept;
+		udp dport 12345  ct zone set numgen inc mod 65536 map @rndzone
+	}
+}
+EOF
+	(
+		echo "add element inet raw rndzone {"
+	for i in $(seq 1 $max_zones);do
+		echo -n "$i : $i"
+		if [ $i -lt $max_zones ]; then
+			echo ","
+		else
+			echo "}"
+		fi
+	done
+	) | ip netns exec $ns nft -f /dev/stdin
+
+	local i=0
+	local j=0
+	local outerstart=$(date +%s%3N)
+	local stop=$outerstart
+
+	while [ $i -lt $max_zones ]; do
+		local start=$(date +%s%3N)
+		i=$((i + 1000))
+		j=$((j + 1))
+		# nft rule in output places each packet in a different zone.
+		dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
+		if [ $? -ne 0 ] ;then
+			ret=1
+			break
+		fi
+
+		stop=$(date +%s%3N)
+		local duration=$((stop-start))
+		echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
+	done
+
+	if [ $have_ct_tool -eq 1 ]; then
+		local count=$(ip netns exec "$ns" conntrack -C)
+		local duration=$((stop-outerstart))
+
+		if [ $count -eq $max_zones ]; then
+			echo "PASS: inserted $count entries from packet path in $duration ms total"
+		else
+			ip netns exec $ns conntrack -S 1>&2
+			echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+			ret=1
+		fi
+	fi
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: insert $max_zones entries from packet path" 1>&2
+	fi
+}
+
+test_conntrack_tool() {
+	local max_zones=$1
+
+	ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+	local outerstart=$(date +%s%3N)
+	local start=$(date +%s%3N)
+	local stop=$start
+	local i=0
+	while [ $i -lt $max_zones ]; do
+		i=$((i + 1))
+		ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+	                 --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+		if [ $? -ne 0 ];then
+			ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+	                 --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+			echo "FAIL: conntrack -I returned an error"
+			ret=1
+			break
+		fi
+
+		if [ $((i%1000)) -eq 0 ];then
+			stop=$(date +%s%3N)
+
+			local duration=$((stop-start))
+			echo "PASS: added 1000 entries in $duration ms (now $i total)"
+			start=$stop
+		fi
+	done
+
+	local count=$(ip netns exec "$ns" conntrack -C)
+	local duration=$((stop-outerstart))
+
+	if [ $count -eq $max_zones ]; then
+		echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+	else
+		ip netns exec $ns conntrack -S 1>&2
+		echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+		ret=1
+	fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+	test_conntrack_tool $zones
+else
+	echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+	if [ $ret -eq 0 ];then
+		exit $ksft_skip
+	fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/netfilter/rpath.sh
new file mode 100755
index 000000000000..5289c8447a41
--- /dev/null
+++ b/tools/testing/selftests/netfilter/rpath.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+
+# search for legacy iptables (it uses the xtables extensions
+if iptables-legacy --version >/dev/null 2>&1; then
+	iptables='iptables-legacy'
+elif iptables --version >/dev/null 2>&1; then
+	iptables='iptables'
+else
+	iptables=''
+fi
+
+if ip6tables-legacy --version >/dev/null 2>&1; then
+	ip6tables='ip6tables-legacy'
+elif ip6tables --version >/dev/null 2>&1; then
+	ip6tables='ip6tables'
+else
+	ip6tables=''
+fi
+
+if nft --version >/dev/null 2>&1; then
+	nft='nft'
+else
+	nft=''
+fi
+
+if [ -z "$iptables$ip6tables$nft" ]; then
+	echo "SKIP: Test needs iptables, ip6tables or nft"
+	exit $ksft_skip
+fi
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+trap "ip netns del $ns1; ip netns del $ns2" EXIT
+
+# create two netns, disable rp_filter in ns2 and
+# keep IPv6 address when moving into VRF
+ip netns add "$ns1"
+ip netns add "$ns2"
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
+
+# a standard connection between the netns, should not trigger rp filter
+ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2"
+ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up
+ip -net "$ns1" a a 192.168.23.2/24 dev v0
+ip -net "$ns2" a a 192.168.23.1/24 dev v0
+ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad
+
+# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0
+ip -net "$ns2" link add d0 type dummy
+ip -net "$ns2" link set d0 up
+ip -net "$ns1" a a 192.168.42.2/24 dev v0
+ip -net "$ns2" a a 192.168.42.1/24 dev d0
+ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
+
+# firewall matches to test
+[ -n "$iptables" ] && {
+	common='-t raw -A PREROUTING -s 192.168.0.0/16'
+	ip netns exec "$ns2" "$iptables" $common -m rpfilter
+	ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert
+}
+[ -n "$ip6tables" ] && {
+	common='-t raw -A PREROUTING -s fec0::/16'
+	ip netns exec "$ns2" "$ip6tables" $common -m rpfilter
+	ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert
+}
+[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF
+table inet t {
+	chain c {
+		type filter hook prerouting priority raw;
+		ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
+		ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+	}
+}
+EOF
+
+die() {
+	echo "FAIL: $*"
+	#ip netns exec "$ns2" "$iptables" -t raw -vS
+	#ip netns exec "$ns2" "$ip6tables" -t raw -vS
+	#ip netns exec "$ns2" nft list ruleset
+	exit 1
+}
+
+# check rule counters, return true if rule did not match
+ipt_zero_rule() { # (command)
+	[ -n "$1" ] || return 0
+	ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0"
+}
+ipt_zero_reverse_rule() { # (command)
+	[ -n "$1" ] || return 0
+	ip netns exec "$ns2" "$1" -t raw -vS | \
+		grep -q -- "-m rpfilter --invert -c 0 0"
+}
+nft_zero_rule() { # (family)
+	[ -n "$nft" ] || return 0
+	ip netns exec "$ns2" "$nft" list chain inet t c | \
+		grep -q "$1 saddr .* counter packets 0 bytes 0"
+}
+
+netns_ping() { # (netns, args...)
+	local netns="$1"
+	shift
+	ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null
+}
+
+clear_counters() {
+	[ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z
+	[ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z
+	if [ -n "$nft" ]; then
+		(
+			echo "delete table inet t";
+			ip netns exec "$ns2" $nft -s list table inet t;
+		) | ip netns exec "$ns2" $nft -f -
+	fi
+}
+
+testrun() {
+	clear_counters
+
+	# test 1: martian traffic should fail rpfilter matches
+	netns_ping "$ns1" -I v0 192.168.42.1 && \
+		die "martian ping 192.168.42.1 succeeded"
+	netns_ping "$ns1" -I v0 fec0:42::1 && \
+		die "martian ping fec0:42::1 succeeded"
+
+	ipt_zero_rule "$iptables" || die "iptables matched martian"
+	ipt_zero_rule "$ip6tables" || die "ip6tables matched martian"
+	ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian"
+	ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian"
+	nft_zero_rule ip || die "nft IPv4 matched martian"
+	nft_zero_rule ip6 || die "nft IPv6 matched martian"
+
+	clear_counters
+
+	# test 2: rpfilter match should pass for regular traffic
+	netns_ping "$ns1" 192.168.23.1 || \
+		die "regular ping 192.168.23.1 failed"
+	netns_ping "$ns1" fec0:23::1 || \
+		die "regular ping fec0:23::1 failed"
+
+	ipt_zero_rule "$iptables" && die "iptables match not effective"
+	ipt_zero_rule "$ip6tables" && die "ip6tables match not effective"
+	ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective"
+	ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective"
+	nft_zero_rule ip && die "nft IPv4 match not effective"
+	nft_zero_rule ip6 && die "nft IPv6 match not effective"
+
+}
+
+testrun
+
+# repeat test with vrf device in $ns2
+ip -net "$ns2" link add vrf0 type vrf table 10
+ip -net "$ns2" link set vrf0 up
+ip -net "$ns2" link set v0 master vrf0
+
+testrun
+
+echo "PASS: netfilter reverse path match works as intended"
+exit 0
diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/netfilter/sctp_collision.c
new file mode 100644
index 000000000000..21bb1cfd8a85
--- /dev/null
+++ b/tools/testing/selftests/netfilter/sctp_collision.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_in saddr = {}, daddr = {};
+	int sd, ret, len = sizeof(daddr);
+	struct timeval tv = {25, 0};
+	char buf[] = "hello";
+
+	if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
+		printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
+		       argv[0]);
+		return -1;
+	}
+
+	sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+	if (sd < 0) {
+		printf("Failed to create sd\n");
+		return -1;
+	}
+
+	saddr.sin_family = AF_INET;
+	saddr.sin_addr.s_addr = inet_addr(argv[2]);
+	saddr.sin_port = htons(atoi(argv[3]));
+
+	ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (ret < 0) {
+		printf("Failed to bind to address\n");
+		goto out;
+	}
+
+	ret = listen(sd, 5);
+	if (ret < 0) {
+		printf("Failed to listen on port\n");
+		goto out;
+	}
+
+	daddr.sin_family = AF_INET;
+	daddr.sin_addr.s_addr = inet_addr(argv[4]);
+	daddr.sin_port = htons(atoi(argv[5]));
+
+	/* make test shorter than 25s */
+	ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+	if (ret < 0) {
+		printf("Failed to setsockopt SO_RCVTIMEO\n");
+		goto out;
+	}
+
+	if (!strcmp(argv[1], "server")) {
+		sleep(1); /* wait a bit for client's INIT */
+		ret = connect(sd, (struct sockaddr *)&daddr, len);
+		if (ret < 0) {
+			printf("Failed to connect to peer\n");
+			goto out;
+		}
+		ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+		if (ret < 0) {
+			printf("Failed to recv msg %d\n", ret);
+			goto out;
+		}
+		ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+		if (ret < 0) {
+			printf("Failed to send msg %d\n", ret);
+			goto out;
+		}
+		printf("Server: sent! %d\n", ret);
+	}
+
+	if (!strcmp(argv[1], "client")) {
+		usleep(300000); /* wait a bit for server's listening */
+		ret = connect(sd, (struct sockaddr *)&daddr, len);
+		if (ret < 0) {
+			printf("Failed to connect to peer\n");
+			goto out;
+		}
+		sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */
+		ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+		if (ret < 0) {
+			printf("Failed to send msg %d\n", ret);
+			goto out;
+		}
+		ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+		if (ret < 0) {
+			printf("Failed to recv msg %d\n", ret);
+			goto out;
+		}
+		printf("Client: rcvd! %d\n", ret);
+	}
+	ret = 0;
+out:
+	close(sd);
+	return ret;
+}
diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/netfilter/xt_string.sh
new file mode 100755
index 000000000000..1802653a4728
--- /dev/null
+++ b/tools/testing/selftests/netfilter/xt_string.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+rc=0
+
+if ! iptables --version >/dev/null 2>&1; then
+	echo "SKIP: Test needs iptables"
+	exit $ksft_skip
+fi
+if ! ip -V >/dev/null 2>&1; then
+	echo "SKIP: Test needs iproute2"
+	exit $ksft_skip
+fi
+if ! nc -h >/dev/null 2>&1; then
+	echo "SKIP: Test needs netcat"
+	exit $ksft_skip
+fi
+
+pattern="foo bar baz"
+patlen=11
+hdrlen=$((20 + 8)) # IPv4 + UDP
+ns="ns-$(mktemp -u XXXXXXXX)"
+trap 'ip netns del $ns' EXIT
+ip netns add "$ns"
+ip -net "$ns" link add d0 type dummy
+ip -net "$ns" link set d0 up
+ip -net "$ns" addr add 10.1.2.1/24 dev d0
+
+#ip netns exec "$ns" tcpdump -npXi d0 &
+#tcpdump_pid=$!
+#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT
+
+add_rule() { # (alg, from, to)
+	ip netns exec "$ns" \
+		iptables -A OUTPUT -o d0 -m string \
+			--string "$pattern" --algo $1 --from $2 --to $3
+}
+showrules() { # ()
+	ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A'
+}
+zerorules() {
+	ip netns exec "$ns" iptables -Z OUTPUT
+}
+countrule() { # (pattern)
+	showrules | grep -c -- "$*"
+}
+send() { # (offset)
+	( for ((i = 0; i < $1 - $hdrlen; i++)); do
+		printf " "
+	  done
+	  printf "$pattern"
+	) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374
+}
+
+add_rule bm 1000 1500
+add_rule bm 1400 1600
+add_rule kmp 1000 1500
+add_rule kmp 1400 1600
+
+zerorules
+send 0
+send $((1000 - $patlen))
+if [ $(countrule -c 0 0) -ne 4 ]; then
+	echo "FAIL: rules match data before --from"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1000
+send $((1400 - $patlen))
+if [ $(countrule -c 2) -ne 2 ]; then
+	echo "FAIL: only two rules should match at low offset"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1500 - $patlen))
+if [ $(countrule -c 1) -ne 4 ]; then
+	echo "FAIL: all rules should match at end of packet"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1495
+if [ $(countrule -c 1) -ne 1 ]; then
+	echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1500
+if [ $(countrule -c 1) -ne 2 ]; then
+	echo "FAIL: two rules should match pattern at start of second fragment"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen))
+if [ $(countrule -c 1) -ne 2 ]; then
+	echo "FAIL: two rules should match pattern at end of largest --to"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen + 1))
+if [ $(countrule -c 1) -ne 0 ]; then
+	echo "FAIL: no rules should match pattern extending largest --to"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1600
+if [ $(countrule -c 1) -ne 0 ]; then
+	echo "FAIL: no rule should match pattern past largest --to"
+	showrules
+	((rc--))
+fi
+
+exit $rc