32 files changed, 4826 insertions, 83 deletions
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 79c9eb0034d5..492b273743b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -25,6 +25,9 @@ TEST_PROGS += bareudp.sh
 TEST_PROGS += unicast_extensions.sh
 TEST_PROGS += udpgro_fwd.sh
 TEST_PROGS += veth.sh
+TEST_PROGS += ioam6.sh
+TEST_PROGS += gro.sh
+TEST_PROGS += gre_gso.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -36,8 +39,11 @@ TEST_GEN_FILES += fin_ack_lat
 TEST_GEN_FILES += reuseaddr_ports_exhausted
 TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
 TEST_GEN_FILES += ipsec
+TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644
index 000000000000..cfc7f4f97fd1
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -0,0 +1,5 @@
+##TEST_GEN_FILES := test_unix_oob
+TEST_PROGS := test_unix_oob
+include ../../lib.mk
+
+all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644
index 000000000000..0f3e3763f4f8
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+	signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+	struct sigaction sa;
+
+	sa.sa_sigaction = sig_hand;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+	return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+	int flags = fcntl(fd, F_GETFL, 0);
+
+	if (set)
+		flags &= ~O_NONBLOCK;
+	else
+		flags |= O_NONBLOCK;
+	fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+	char cmd;
+
+	cmd = 'S';
+	write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+	char buf[5];
+
+	read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+	fflush(NULL);
+	unlink(sock_name);
+	kill(producer_id, SIGTERM);
+	exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+	int ans = -1;
+
+	if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+		perror("SIOCATMARK Failed");
+#endif
+	}
+	return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+	*c = ' ';
+	if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+		perror("Reading MSG_OOB Failed");
+#endif
+	}
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+	int len = 0;
+
+	memset(buf, size, '0');
+	len = read(pfd, buf, size);
+#ifdef DEBUG
+	if (len < 0)
+		perror("read failed");
+#endif
+	return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+	struct pollfd pfds[1];
+
+	pfds[0].fd = pfd;
+	pfds[0].events = event;
+	poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+	int cfd;
+	char buf[64];
+	int i;
+
+	memset(buf, 'x', sizeof(buf));
+	cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+	wait_for_signal(pipefd[0]);
+	if (connect(cfd, (struct sockaddr *)consumer_addr,
+		     sizeof(struct sockaddr)) != 0) {
+		perror("Connect failed");
+		kill(0, SIGTERM);
+		exit(1);
+	}
+
+	for (i = 0; i < 2; i++) {
+		/* Test 1: Test for SIGURG and OOB */
+		wait_for_signal(pipefd[0]);
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '@';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		wait_for_signal(pipefd[0]);
+
+		/* Test 2: Test for OOB being overwitten */
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '%';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '#';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		wait_for_signal(pipefd[0]);
+
+		/* Test 3: Test for SIOCATMARK */
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '@';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		memset(buf, 'x', sizeof(buf));
+		buf[63] = '%';
+		send(cfd, buf, sizeof(buf), MSG_OOB);
+
+		memset(buf, 'x', sizeof(buf));
+		send(cfd, buf, sizeof(buf), 0);
+
+		wait_for_signal(pipefd[0]);
+
+		/* Test 4: Test for 1byte OOB msg */
+		memset(buf, 'x', sizeof(buf));
+		buf[0] = '@';
+		send(cfd, buf, 1, MSG_OOB);
+	}
+}
+
+int
+main(int argc, char **argv)
+{
+	int lfd, pfd;
+	struct sockaddr_un consumer_addr, paddr;
+	socklen_t len = sizeof(consumer_addr);
+	char buf[1024];
+	int on = 0;
+	char oob;
+	int flags;
+	int atmark;
+	char *tmp_file;
+
+	lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+	memset(&consumer_addr, 0, sizeof(consumer_addr));
+	consumer_addr.sun_family = AF_UNIX;
+	sprintf(sock_name, "unix_oob_%d", getpid());
+	unlink(sock_name);
+	strcpy(consumer_addr.sun_path, sock_name);
+
+	if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+		  sizeof(consumer_addr))) != 0) {
+		perror("socket bind failed");
+		exit(1);
+	}
+
+	pipe(pipefd);
+
+	listen(lfd, 1);
+
+	producer_id = fork();
+	if (producer_id == 0) {
+		producer(&consumer_addr);
+		exit(0);
+	}
+
+	set_sig_handler(SIGURG);
+	signal_producer(pipefd[1]);
+
+	pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+	fcntl(pfd, F_SETOWN, getpid());
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 1:
+	 * veriyf that SIGURG is
+	 * delivered and 63 bytes are
+	 * read and oob is '@'
+	 */
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	read_oob(pfd, &oob);
+	len = read_data(pfd, buf, 1024);
+	if (!signal_recvd || len != 63 || oob != '@') {
+		fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+			 signal_recvd, len, oob);
+			die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 2:
+	 * Verify that the first OOB is over written by
+	 * the 2nd one and the first OOB is returned as
+	 * part of the read, and sigurg is received.
+	 */
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	len = 0;
+	while (len < 70)
+		len = recv(pfd, buf, 1024, MSG_PEEK);
+	len = read_data(pfd, buf, 1024);
+	read_oob(pfd, &oob);
+	if (!signal_recvd || len != 127 || oob != '#') {
+		fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+		signal_recvd, len, oob);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 3:
+	 * verify that 2nd oob over writes
+	 * the first one and read breaks at
+	 * oob boundary returning 127 bytes
+	 * and sigurg is received and atmark
+	 * is set.
+	 * oob is '%' and second read returns
+	 * 64 bytes.
+	 */
+	len = 0;
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	while (len < 150)
+		len = recv(pfd, buf, 1024, MSG_PEEK);
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	read_oob(pfd, &oob);
+
+	if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+		fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
+		"atmark %d\n", signal_recvd, len, oob, atmark);
+		die(1);
+	}
+
+	signal_recvd = 0;
+
+	len = read_data(pfd, buf, 1024);
+	if (len != 64) {
+		fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+			signal_recvd, len, oob);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 4:
+	 * verify that a single byte
+	 * oob message is delivered.
+	 * set non blocking mode and
+	 * check proper error is
+	 * returned and sigurg is
+	 * received and correct
+	 * oob is read.
+	 */
+
+	set_filemode(pfd, 0);
+
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	len = read_data(pfd, buf, 1024);
+	if ((len == -1) && (errno == 11))
+		len = 0;
+
+	read_oob(pfd, &oob);
+
+	if (!signal_recvd || len != 0 || oob != '@') {
+		fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+			 signal_recvd, len, oob);
+		die(1);
+	}
+
+	set_filemode(pfd, 1);
+
+	/* Inline Testing */
+
+	on = 1;
+	if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+		perror("SO_OOBINLINE");
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 1 -- Inline:
+	 * Check that SIGURG is
+	 * delivered and 63 bytes are
+	 * read and oob is '@'
+	 */
+
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	len = read_data(pfd, buf, 1024);
+
+	if (!signal_recvd || len != 63) {
+		fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+			signal_recvd, len);
+		die(1);
+	}
+
+	len = read_data(pfd, buf, 1024);
+
+	if (len != 1) {
+		fprintf(stderr,
+			 "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+			 signal_recvd, len, oob);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 2 -- Inline:
+	 * Verify that the first OOB is over written by
+	 * the 2nd one and read breaks correctly on
+	 * 2nd OOB boundary with the first OOB returned as
+	 * part of the read, and sigurg is delivered and
+	 * siocatmark returns true.
+	 * next read returns one byte, the oob byte
+	 * and siocatmark returns false.
+	 */
+	len = 0;
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	while (len < 70)
+		len = recv(pfd, buf, 1024, MSG_PEEK);
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	if (len != 127 || atmark != 1 || !signal_recvd) {
+		fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+			 len, atmark);
+		die(1);
+	}
+
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	if (len != 1 || buf[0] != '#' || atmark == 1) {
+		fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+			len, buf[0], atmark);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 3 -- Inline:
+	 * verify that 2nd oob over writes
+	 * the first one and read breaks at
+	 * oob boundary returning 127 bytes
+	 * and sigurg is received and siocatmark
+	 * is true after the read.
+	 * subsequent read returns 65 bytes
+	 * because of oob which should be '%'.
+	 */
+	len = 0;
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	while (len < 126)
+		len = recv(pfd, buf, 1024, MSG_PEEK);
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	if (!signal_recvd || len != 127 || !atmark) {
+		fprintf(stderr,
+			 "Test 3 Inline failed, sigurg %d len %d data %c\n",
+			 signal_recvd, len, buf[0]);
+		die(1);
+	}
+
+	len = read_data(pfd, buf, 1024);
+	atmark = is_sioctatmark(pfd);
+	if (len != 65 || buf[0] != '%' || atmark != 0) {
+		fprintf(stderr,
+			 "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+			 len, buf[0], atmark);
+		die(1);
+	}
+
+	signal_recvd = 0;
+	signal_producer(pipefd[1]);
+
+	/* Test 4 -- Inline:
+	 * verify that a single
+	 * byte oob message is delivered
+	 * and read returns one byte, the oob
+	 * byte and sigurg is received
+	 */
+	wait_for_data(pfd, POLLIN | POLLPRI);
+	len = read_data(pfd, buf, 1024);
+	if (!signal_recvd || len != 1 || buf[0] != '@') {
+		fprintf(stderr,
+			"Test 4 Inline failed, signal %d len %d data %c\n",
+		signal_recvd, len, buf[0]);
+		die(1);
+	}
+	die(0);
+}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 6f905b53904f..21b646d10b88 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -42,3 +42,4 @@ CONFIG_NET_CLS_FLOWER=m
 CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_BAREUDP=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index a8ad92850e63..13350cd5c8ac 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -37,6 +37,9 @@
 #
 # server / client nomenclature relative to ns-A
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 VERBOSE=0
 
 NSA_DEV=eth1
@@ -3879,6 +3882,32 @@ use_case_ping_lla_multi()
 	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
 }
 
+# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully
+# established with ns-B.
+use_case_snat_on_vrf()
+{
+	setup "yes"
+
+	local port="12345"
+
+	run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+	run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+
+	run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
+	sleep 1
+	run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
+	log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
+
+	run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
+	sleep 1
+	run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
+	log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
+
+	# Cleanup
+	run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+	run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+}
+
 use_cases()
 {
 	log_section "Use cases"
@@ -3886,6 +3915,8 @@ use_cases()
 	use_case_br
 	log_subsection "Ping LLA with multiple interfaces"
 	use_case_ping_lla_multi
+	log_subsection "SNAT on VRF"
+	use_case_snat_on_vrf
 }
 
 ################################################################################
@@ -3946,7 +3977,7 @@ fi
 which nettest >/dev/null
 if [ $? -ne 0 ]; then
 	echo "'nettest' command not found; skipping tests"
-	exit 0
+	exit $ksft_skip
 fi
 
 declare -i nfail=0
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index a93e6b690e06..43ea8407a82e 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -3,6 +3,9 @@
 
 # This test is for checking IPv4 and IPv6 FIB rules API
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 ret=0
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
@@ -238,12 +241,12 @@ run_fibrule_tests()
 
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 # start clean
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 13d3d4428a32..2c14a86adaaa 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -1,6 +1,9 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 ##############################################################################
 # Defines
 
@@ -9,11 +12,11 @@ if [[ ! -v DEVLINK_DEV ]]; then
 			     | jq -r '.port | keys[]' | cut -d/ -f-2)
 	if [ -z "$DEVLINK_DEV" ]; then
 		echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
-		exit 1
+		exit $ksft_skip
 	fi
 	if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
 		echo "SKIP: devlink device's bus is not PCI"
-		exit 1
+		exit $ksft_skip
 	fi
 
 	DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
@@ -22,7 +25,7 @@ elif [[ ! -z "$DEVLINK_DEV" ]]; then
 	devlink dev show $DEVLINK_DEV &> /dev/null
 	if [ $? -ne 0 ]; then
 		echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
-		exit 1
+		exit $ksft_skip
 	fi
 fi
 
@@ -32,19 +35,19 @@ fi
 devlink help 2>&1 | grep resource &> /dev/null
 if [ $? -ne 0 ]; then
 	echo "SKIP: iproute2 too old, missing devlink resource support"
-	exit 1
+	exit $ksft_skip
 fi
 
 devlink help 2>&1 | grep trap &> /dev/null
 if [ $? -ne 0 ]; then
 	echo "SKIP: iproute2 too old, missing devlink trap support"
-	exit 1
+	exit $ksft_skip
 fi
 
 devlink dev help 2>&1 | grep info &> /dev/null
 if [ $? -ne 0 ]; then
 	echo "SKIP: iproute2 too old, missing devlink dev info support"
-	exit 1
+	exit $ksft_skip
 fi
 
 ##############################################################################
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 42e28c983d41..e7fc5c35b569 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -4,6 +4,9 @@
 ##############################################################################
 # Defines
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # Can be overridden by the configuration file.
 PING=${PING:=ping}
 PING6=${PING6:=ping6}
@@ -38,7 +41,7 @@ check_tc_version()
 	tc -j &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing JSON support"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
@@ -51,7 +54,7 @@ check_tc_mpls_support()
 		matchall action pipe &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing MPLS support"
-		return 1
+		return $ksft_skip
 	fi
 	tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
 		matchall
@@ -69,7 +72,7 @@ check_tc_mpls_lse_stats()
 
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
-		return 1
+		return $ksft_skip
 	fi
 
 	tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
@@ -79,7 +82,7 @@ check_tc_mpls_lse_stats()
 
 	if [[ $ret -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
-		return 1
+		return $ksft_skip
 	fi
 }
 
@@ -88,7 +91,7 @@ check_tc_shblock_support()
 	tc filter help 2>&1 | grep block &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing shared block support"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
@@ -97,7 +100,7 @@ check_tc_chain_support()
 	tc help 2>&1|grep chain &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing chain support"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
@@ -106,7 +109,7 @@ check_tc_action_hw_stats_support()
 	tc actions help 2>&1 | grep -q hw_stats
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
@@ -115,13 +118,13 @@ check_ethtool_lanes_support()
 	ethtool --help 2>&1| grep lanes &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: ethtool too old; it is missing lanes support"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
 if [[ "$(id -u)" -ne 0 ]]; then
 	echo "SKIP: need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [[ "$CHECK_TC" = "yes" ]]; then
@@ -134,7 +137,7 @@ require_command()
 
 	if [[ ! -x "$(command -v "$cmd")" ]]; then
 		echo "SKIP: $cmd not installed"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
@@ -143,7 +146,7 @@ require_command $MZ
 
 if [[ ! -v NUM_NETIFS ]]; then
 	echo "SKIP: importer does not define \"NUM_NETIFS\""
-	exit 1
+	exit $ksft_skip
 fi
 
 ##############################################################################
@@ -203,7 +206,7 @@ for ((i = 1; i <= NUM_NETIFS; ++i)); do
 	ip link show dev ${NETIFS[p$i]} &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: could not find all required interfaces"
-		exit 1
+		exit $ksft_skip
 	fi
 done
 
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 76efb1f8375e..a0d612e04990 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -411,7 +411,7 @@ ping_ipv6()
 ip nexthop ls >/dev/null 2>&1
 if [ $? -ne 0 ]; then
 	echo "Nexthop objects not supported; skipping tests"
-	exit 0
+	exit $ksft_skip
 fi
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index 4898dd4118f1..cb08ffe2356a 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -386,7 +386,7 @@ ping_ipv6()
 ip nexthop ls >/dev/null 2>&1
 if [ $? -ne 0 ]; then
 	echo "Nexthop objects not supported; skipping tests"
-	exit 0
+	exit $ksft_skip
 fi
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
new file mode 100755
index 000000000000..facbb0c80443
--- /dev/null
+++ b/tools/testing/selftests/net/gre_gso.sh
@@ -0,0 +1,236 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking GRE GSO.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="gre_gso"
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+IP="ip -netns ns1"
+NS_EXEC="ip netns exec ns1"
+TMPFILE=`mktemp`
+PID=
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+setup()
+{
+	set -e
+	ip netns add ns1
+	ip netns set ns1 auto
+	$IP link set dev lo up
+
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 up
+	ip link set veth1 netns ns1
+	$IP link set veth1 name veth0
+	$IP link set veth0 up
+
+	dd if=/dev/urandom of=$TMPFILE bs=1024 count=2048 &>/dev/null
+	set +e
+}
+
+cleanup()
+{
+	rm -rf $TMPFILE
+	[ -n "$PID" ] && kill $PID
+	ip link del dev gre1 &> /dev/null
+	ip link del dev veth0 &> /dev/null
+	ip netns del ns1
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local ns=$2
+	local addr
+
+	[ -n "$ns" ] && ns="-netns $ns"
+
+	addr=$(ip -6 -br $ns addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+gre_create_tun()
+{
+	local a1=$1
+	local a2=$2
+	local mode
+
+	[[ $a1 =~ ^[0-9.]*$ ]] && mode=gre || mode=ip6gre
+
+	ip tunnel add gre1 mode $mode local $a1 remote $a2 dev veth0
+	ip link set gre1 up
+	$IP tunnel add gre1 mode $mode local $a2 remote $a1 dev veth0
+	$IP link set gre1 up
+}
+
+gre_gst_test_checks()
+{
+	local name=$1
+	local addr=$2
+
+	$NS_EXEC nc -kl $port >/dev/null &
+	PID=$!
+	while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done
+
+	cat $TMPFILE | timeout 1 nc $addr $port
+	log_test $? 0 "$name - copy file w/ TSO"
+
+	ethtool -K veth0 tso off
+
+	cat $TMPFILE | timeout 1 nc $addr $port
+	log_test $? 0 "$name - copy file w/ GSO"
+
+	ethtool -K veth0 tso on
+
+	kill $PID
+	PID=
+}
+
+gre6_gso_test()
+{
+	local port=7777
+
+	setup
+
+	a1=$(get_linklocal veth0)
+	a2=$(get_linklocal veth0 ns1)
+
+	gre_create_tun $a1 $a2
+
+	ip  addr add 172.16.2.1/24 dev gre1
+	$IP addr add 172.16.2.2/24 dev gre1
+
+	ip  -6 addr add 2001:db8:1::1/64 dev gre1 nodad
+	$IP -6 addr add 2001:db8:1::2/64 dev gre1 nodad
+
+	sleep 2
+
+	gre_gst_test_checks GREv6/v4 172.16.2.2
+	gre_gst_test_checks GREv6/v6 2001:db8:1::2
+
+	cleanup
+}
+
+gre_gso_test()
+{
+	gre6_gso_test
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v nc)" ]; then
+	echo "SKIP: Could not run test without nc tool"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+	case $t in
+	gre_gso)		gre_gso_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644
index 000000000000..cf37ce86b0fd
--- /dev/null
+++ b/tools/testing/selftests/net/gro.c
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ *  Data packets of the same size and same header setup with correct
+ *  sequence numbers coalesce. The one exception being the last data
+ *  packet coalesced: it can be smaller than the rest and coalesced
+ *  as long as it is in the same flow.
+ * 2.ack
+ *  Pure ACK does not coalesce.
+ * 3.flags
+ *  Specific test cases: no packets with PSH, SYN, URG, RST set will
+ *  be coalesced.
+ * 4.tcp
+ *  Packets with incorrect checksum, non-consecutive seqno and
+ *  different TCP header options shouldn't coalesce. Nit: given that
+ *  some extension headers have paddings, such as timestamp, headers
+ *  that are padding differently would not be coalesced.
+ * 5.ip:
+ *  Packets with different (ECN, TTL, TOS) header, ip options or
+ *  ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ *  Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+	va_list args;
+
+	if (verbose) {
+		va_start(args, fmt);
+		vfprintf(stderr, fmt, args);
+		va_end(args);
+	}
+}
+
+static void setup_sock_filter(int fd)
+{
+	const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+	const int ethproto_off = offsetof(struct ethhdr, h_proto);
+	int optlen = 0;
+	int ipproto_off;
+	int next_off;
+
+	if (proto == PF_INET)
+		next_off = offsetof(struct iphdr, protocol);
+	else
+		next_off = offsetof(struct ipv6hdr, nexthdr);
+	ipproto_off = ETH_HLEN + next_off;
+
+	if (strcmp(testname, "ip") == 0) {
+		if (proto == PF_INET)
+			optlen = sizeof(struct ip_timestamp);
+		else
+			optlen = sizeof(struct ip6_frag);
+	}
+
+	struct sock_filter filter[] = {
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, ethproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, ipproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off + optlen),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+			BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+			BPF_STMT(BPF_RET + BPF_K, 0),
+	};
+
+	struct sock_fprog bpf = {
+		.len = ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+		error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = data;
+	int i;
+
+	for (i = 0; i < len / 2; i++)
+		sum += words[i];
+	if (len & 1)
+		sum += ((char *)data)[len - 1];
+	return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+	sum = checksum_nofold(data, len, sum);
+	while (sum > 0xFFFF)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+	return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+	struct pseudo_header6 {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		uint16_t protocol;
+		uint16_t payload_len;
+	} ph6;
+	struct pseudo_header4 {
+		struct in_addr saddr;
+		struct in_addr daddr;
+		uint16_t protocol;
+		uint16_t payload_len;
+	} ph4;
+	uint32_t sum = 0;
+
+	if (proto == PF_INET6) {
+		if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+			error(1, errno, "inet_pton6 source ip pseudo");
+		if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+			error(1, errno, "inet_pton6 dest ip pseudo");
+		ph6.protocol = htons(IPPROTO_TCP);
+		ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+		sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+	} else if (proto == PF_INET) {
+		if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+			error(1, errno, "inet_pton source ip pseudo");
+		if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+			error(1, errno, "inet_pton dest ip pseudo");
+		ph4.protocol = htons(IPPROTO_TCP);
+		ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+		sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+	}
+
+	return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+	if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+		   &mac_addr[0], &mac_addr[1], &mac_addr[2],
+		   &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+		error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+	struct ethhdr *eth = buf;
+
+	memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+	memcpy(eth->h_source, src_mac, ETH_ALEN);
+	eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+	struct ipv6hdr *ip6h = buf;
+	struct iphdr *iph = buf;
+
+	if (proto == PF_INET6) {
+		memset(ip6h, 0, sizeof(*ip6h));
+
+		ip6h->version = 6;
+		ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+		ip6h->nexthdr = IPPROTO_TCP;
+		ip6h->hop_limit = 8;
+		if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+			error(1, errno, "inet_pton source ip6");
+		if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+			error(1, errno, "inet_pton dest ip6");
+	} else if (proto == PF_INET) {
+		memset(iph, 0, sizeof(*iph));
+
+		iph->version = 4;
+		iph->ihl = 5;
+		iph->ttl = 8;
+		iph->protocol	= IPPROTO_TCP;
+		iph->tot_len = htons(sizeof(struct tcphdr) +
+				payload_len + sizeof(struct iphdr));
+		iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+		if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+			error(1, errno, "inet_pton source ip");
+		if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+			error(1, errno, "inet_pton dest ip");
+		iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+	}
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+				int payload_len, int fin)
+{
+	struct tcphdr *tcph = buf;
+
+	memset(tcph, 0, sizeof(*tcph));
+
+	tcph->source = htons(SPORT);
+	tcph->dest = htons(DPORT);
+	tcph->seq = ntohl(START_SEQ + seq_offset);
+	tcph->ack_seq = ntohl(START_ACK + ack_offset);
+	tcph->ack = 1;
+	tcph->fin = fin;
+	tcph->doff = 5;
+	tcph->window = htons(TCP_MAXWIN);
+	tcph->urg_ptr = 0;
+	tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+	int ret = -1;
+
+	ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+	if (ret == -1)
+		error(1, errno, "sendto failure");
+	if (ret != len)
+		error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+			  int payload_len, int fin)
+{
+	memset(buf, 0, total_hdr_len);
+	memset(buf + total_hdr_len, 'a', payload_len);
+	fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+			    payload_len, fin);
+	fill_networklayer(buf + ETH_HLEN, payload_len);
+	fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+		       int rst, int urg)
+{
+	static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	int payload_len, pkt_size, flag, i;
+	struct tcphdr *tcph;
+
+	payload_len = PAYLOAD_LEN * psh;
+	pkt_size = total_hdr_len + payload_len;
+	flag = NUM_PACKETS / 2;
+
+	create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+	tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+	tcph->psh = psh;
+	tcph->syn = syn;
+	tcph->rst = rst;
+	tcph->urg = urg;
+	tcph->check = 0;
+	tcph->check = tcp_checksum(tcph, payload_len);
+
+	for (i = 0; i < NUM_PACKETS + 1; i++) {
+		if (i == flag) {
+			write_packet(fd, flag_buf, pkt_size, daddr);
+			continue;
+		}
+		create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+		write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+	}
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+			   int payload_len1, int payload_len2)
+{
+	static char buf[ETH_HLEN + IP_MAXPACKET];
+
+	create_packet(buf, 0, 0, payload_len1, 0);
+	write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+	create_packet(buf, payload_len1, 0, payload_len2, 0);
+	write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+	static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+	static char last[TOTAL_HDR_LEN + MSS];
+	static char new_seg[TOTAL_HDR_LEN + MSS];
+	int i;
+
+	for (i = 0; i < NUM_LARGE_PKT; i++)
+		create_packet(pkts[i], i * MSS, 0, MSS, 0);
+	create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+	create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+	for (i = 0; i < NUM_LARGE_PKT; i++)
+		write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+	write_packet(fd, last, total_hdr_len + remainder, daddr);
+	write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN];
+
+	create_packet(buf, 0, 0, 0, 0);
+	write_packet(fd, buf, total_hdr_len, daddr);
+	write_packet(fd, buf, total_hdr_len, daddr);
+	create_packet(buf, 0, 1, 0, 0);
+	write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+	struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	memmove(buf, no_ext, total_hdr_len);
+	memmove(buf + total_hdr_len + extlen,
+		no_ext + total_hdr_len, PAYLOAD_LEN);
+
+	tcphdr->doff = tcphdr->doff + (extlen / 4);
+	tcphdr->check = 0;
+	tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+	if (proto == PF_INET) {
+		iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	} else {
+		ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+	}
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+	struct tcp_option_ts {
+		uint8_t kind;
+		uint8_t len;
+		uint32_t tsval;
+		uint32_t tsecr;
+	} *opt_ts = (void *)buf;
+	struct tcp_option_window {
+		uint8_t kind;
+		uint8_t len;
+		uint8_t shift;
+	} *opt_window = (void *)buf;
+
+	switch (kind) {
+	case TCPOPT_NOP:
+		buf[0] = TCPOPT_NOP;
+		break;
+	case TCPOPT_WINDOW:
+		memset(opt_window, 0, sizeof(struct tcp_option_window));
+		opt_window->kind = TCPOPT_WINDOW;
+		opt_window->len = TCPOLEN_WINDOW;
+		opt_window->shift = 0;
+		break;
+	case TCPOPT_TIMESTAMP:
+		memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+		opt_ts->kind = TCPOPT_TIMESTAMP;
+		opt_ts->len = TCPOLEN_TIMESTAMP;
+		opt_ts->tsval = ts;
+		opt_ts->tsecr = 0;
+		break;
+	default:
+		error(1, 0, "unimplemented TCP option");
+		break;
+	}
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+	switch (order) {
+	case 0:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+				  TCPOPT_TIMESTAMP, ts);
+		break;
+	case 1:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 1,
+				  TCPOPT_TIMESTAMP, ts);
+		tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+				  TCPOPT_NOP, 0);
+		break;
+	case 2:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+		tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+				  TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+				  TCPOPT_NOP, 0);
+		break;
+	default:
+		error(1, 0, "unknown order");
+		break;
+	}
+	recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	tcph->check = tcph->check - 1;
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	tcph->seq = ntohl(htonl(tcph->seq) + 1);
+	tcph->check = 0;
+	tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+  * don't coalesce.
+  */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+	int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 0, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 0, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 1);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 2);
+	write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+	static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+	int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+	int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt1, buf, 0, 0);
+	write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt1, buf, 0, 0);
+	write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+	tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+	recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+	write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+	struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+	int optlen = sizeof(struct ip_timestamp);
+	struct iphdr *iph;
+
+	if (optlen % 4)
+		error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+	ts->ipt_code = IPOPT_TS;
+	ts->ipt_len = optlen;
+	ts->ipt_ptr = 5;
+	ts->ipt_flg = IPOPT_TS_TSONLY;
+
+	memcpy(optpkt, buf, tcp_offset);
+	memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+	       sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+	iph = (struct iphdr *)(optpkt + ETH_HLEN);
+	iph->ihl = 5 + (optlen / 4);
+	iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+	int optlen = sizeof(struct ip_timestamp);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+	add_ipv4_ts_option(buf, optpkt);
+	write_packet(fd, optpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/*  IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[IP_MAXPACKET];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	/* Once fragmented, packet would retain the total_len.
+	 * Tcp header is prepared as if rest of data is in follow-up frags,
+	 * but follow up frags aren't actually sent.
+	 */
+	memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+	fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+	fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+	fill_datalinklayer(buf);
+
+	iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	iph->ttl = 7;
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	if (proto == PF_INET) {
+		iph->tos = 1;
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	} else if (proto == PF_INET6) {
+		ip6h->priority = 0xf;
+	}
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	if (proto == PF_INET) {
+		buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	} else {
+		buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+	}
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+			   sizeof(struct ip6_frag)];
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+	struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+	int extlen = sizeof(struct ip6_frag);
+	int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+	int extpkt_len = bufpkt_len + extlen;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+		write_packet(fd, buf, bufpkt_len, daddr);
+	}
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	memset(extpkt, 0, extpkt_len);
+
+	ip6h->nexthdr = IPPROTO_FRAGMENT;
+	ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+	frag->ip6f_nxt = IPPROTO_TCP;
+
+	memcpy(extpkt, buf, tcp_offset);
+	memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+	       sizeof(struct tcphdr) + PAYLOAD_LEN);
+	write_packet(fd, extpkt, extpkt_len, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+	struct sockaddr_ll daddr = {};
+
+	daddr.sll_family = AF_PACKET;
+	daddr.sll_protocol = ethhdr_proto;
+	daddr.sll_ifindex = if_nametoindex(ifname);
+	if (daddr.sll_ifindex == 0)
+		error(1, errno, "if_nametoindex");
+
+	if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+		error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+	struct timeval timeout;
+
+	timeout.tv_sec = 120;
+	timeout.tv_usec = 0;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+		       sizeof(timeout)) < 0)
+		error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+			    int correct_num_pkts)
+{
+	static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+	struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+	struct tcphdr *tcph;
+	bool bad_packet = false;
+	int tcp_ext_len = 0;
+	int ip_ext_len = 0;
+	int pkt_size = -1;
+	int data_len = 0;
+	int num_pkt = 0;
+	int i;
+
+	vlog("Expected {");
+	for (i = 0; i < correct_num_pkts; i++)
+		vlog("%d ", correct_payload[i]);
+	vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+	while (1) {
+		pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+		if (pkt_size < 0)
+			error(1, errno, "could not receive");
+
+		if (iph->version == 4)
+			ip_ext_len = (iph->ihl - 5) * 4;
+		else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+			ip_ext_len = sizeof(struct ip6_frag);
+
+		tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+		if (tcph->fin)
+			break;
+
+		tcp_ext_len = (tcph->doff - 5) * 4;
+		data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+		/* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+		 * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+		 * Packet sockets are protocol agnostic, and will not trim the padding.
+		 */
+		if (pkt_size == ETH_ZLEN && iph->version == 4) {
+			data_len = ntohs(iph->tot_len)
+				- sizeof(struct tcphdr) - sizeof(struct iphdr);
+		}
+		vlog("%d ", data_len);
+		if (data_len != correct_payload[num_pkt]) {
+			vlog("[!=%d]", correct_payload[num_pkt]);
+			bad_packet = true;
+		}
+		num_pkt++;
+	}
+	vlog("}, Total %d packets.\n", num_pkt);
+	if (num_pkt != correct_num_pkts)
+		error(1, 0, "incorrect number of packets");
+	if (bad_packet)
+		error(1, 0, "incorrect packet geometry");
+
+	printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+	static char fin_pkt[MAX_HDR_LEN];
+	struct sockaddr_ll daddr = {};
+	int txfd = -1;
+
+	txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+	if (txfd < 0)
+		error(1, errno, "socket creation");
+
+	memset(&daddr, 0, sizeof(daddr));
+	daddr.sll_ifindex = if_nametoindex(ifname);
+	if (daddr.sll_ifindex == 0)
+		error(1, errno, "if_nametoindex");
+	daddr.sll_family = AF_PACKET;
+	memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+	daddr.sll_halen = ETH_ALEN;
+	create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+	if (strcmp(testname, "data") == 0) {
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ack") == 0) {
+		send_ack(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "flags") == 0) {
+		send_flags(txfd, &daddr, 1, 0, 0, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 1, 0, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 0, 1, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 0, 0, 1);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "tcp") == 0) {
+		send_changed_checksum(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_seq(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_ts(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_diff_opt(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip") == 0) {
+		send_changed_ECN(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_tos(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+		if (proto == PF_INET) {
+			/* Modified packets may be received out of order.
+			 * Sleep function added to enforce test boundaries
+			 * so that fin pkts are not received prior to other pkts.
+			 */
+			sleep(1);
+			send_changed_ttl(txfd, &daddr);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			send_ip_options(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			send_fragment4(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+		} else if (proto == PF_INET6) {
+			send_fragment6(txfd, &daddr);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+		}
+	} else if (strcmp(testname, "large") == 0) {
+		/* 20 is the difference between min iphdr size
+		 * and min ipv6hdr size. Like MAX_HDR_SIZE,
+		 * MAX_PAYLOAD is defined with the larger header of the two.
+		 */
+		int offset = proto == PF_INET ? 20 : 0;
+		int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+		send_large(txfd, &daddr, remainder);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_large(txfd, &daddr, remainder + 1);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else {
+		error(1, 0, "Unknown testcase");
+	}
+
+	if (close(txfd))
+		error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+	static int correct_payload[NUM_PACKETS];
+	int rxfd = -1;
+
+	rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+	if (rxfd < 0)
+		error(1, 0, "socket creation");
+	setup_sock_filter(rxfd);
+	set_timeout(rxfd);
+	bind_packetsocket(rxfd);
+
+	memset(correct_payload, 0, sizeof(correct_payload));
+
+	if (strcmp(testname, "data") == 0) {
+		printf("pure data packet of same size: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 1);
+
+		printf("large data packets followed by a smaller one: ");
+		correct_payload[0] = PAYLOAD_LEN * 1.5;
+		check_recv_pkts(rxfd, correct_payload, 1);
+
+		printf("small data packets followed by a larger one: ");
+		correct_payload[0] = PAYLOAD_LEN / 2;
+		correct_payload[1] = PAYLOAD_LEN;
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ack") == 0) {
+		printf("duplicate ack and pure ack: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "flags") == 0) {
+		correct_payload[0] = PAYLOAD_LEN * 3;
+		correct_payload[1] = PAYLOAD_LEN * 2;
+
+		printf("psh flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = 0;
+		correct_payload[2] = PAYLOAD_LEN * 2;
+		printf("syn flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+
+		printf("rst flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+
+		printf("urg flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "tcp") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+		correct_payload[2] = PAYLOAD_LEN;
+		correct_payload[3] = PAYLOAD_LEN;
+
+		printf("changed checksum does not coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("Wrong Seq number doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("Different timestamp doesn't coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 4);
+
+		printf("Different options doesn't coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ip") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+
+		printf("different ECN doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("different tos doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		if (proto == PF_INET) {
+			printf("different ttl doesn't coalesce: ");
+			check_recv_pkts(rxfd, correct_payload, 2);
+
+			printf("ip options doesn't coalesce: ");
+			correct_payload[2] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 3);
+
+			printf("fragmented ip4 doesn't coalesce: ");
+			check_recv_pkts(rxfd, correct_payload, 2);
+		} else if (proto == PF_INET6) {
+			/* GRO doesn't check for ipv6 hop limit when flushing.
+			 * Hence no corresponding test to the ipv4 case.
+			 */
+			printf("fragmented ip6 doesn't coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			check_recv_pkts(rxfd, correct_payload, 2);
+		}
+	} else if (strcmp(testname, "large") == 0) {
+		int offset = proto == PF_INET ? 20 : 0;
+		int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+		correct_payload[0] = (MAX_PAYLOAD + offset);
+		correct_payload[1] = remainder;
+		printf("Shouldn't coalesce if exceed IP max pkt size: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		/* last segment sent individually, doesn't start new segment */
+		correct_payload[0] = correct_payload[0] - remainder;
+		correct_payload[1] = remainder + 1;
+		correct_payload[2] = remainder + 1;
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else {
+		error(1, 0, "Test case error, should never trigger");
+	}
+
+	if (close(rxfd))
+		error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+	static const struct option opts[] = {
+		{ "dmac", required_argument, NULL, 'D' },
+		{ "iface", required_argument, NULL, 'i' },
+		{ "ipv4", no_argument, NULL, '4' },
+		{ "ipv6", no_argument, NULL, '6' },
+		{ "rx", no_argument, NULL, 'r' },
+		{ "smac", required_argument, NULL, 'S' },
+		{ "test", required_argument, NULL, 't' },
+		{ "verbose", no_argument, NULL, 'v' },
+		{ 0, 0, 0, 0 }
+	};
+	int c;
+
+	while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+		switch (c) {
+		case '4':
+			proto = PF_INET;
+			ethhdr_proto = htons(ETH_P_IP);
+			break;
+		case '6':
+			proto = PF_INET6;
+			ethhdr_proto = htons(ETH_P_IPV6);
+			break;
+		case 'D':
+			dmac = optarg;
+			break;
+		case 'i':
+			ifname = optarg;
+			break;
+		case 'r':
+			tx_socket = false;
+			break;
+		case 'S':
+			smac = optarg;
+			break;
+		case 't':
+			testname = optarg;
+			break;
+		case 'v':
+			verbose = true;
+			break;
+		default:
+			error(1, 0, "%s invalid option %c\n", __func__, c);
+			break;
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_args(argc, argv);
+
+	if (proto == PF_INET) {
+		tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+	} else if (proto == PF_INET6) {
+		tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+		total_hdr_len = MAX_HDR_LEN;
+	} else {
+		error(1, 0, "Protocol family is not ipv4 or ipv6");
+	}
+
+	read_MAC(src_mac, smac);
+	read_MAC(dst_mac, dmac);
+
+	if (tx_socket)
+		gro_sender();
+	else
+		gro_receiver();
+	return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755
index 000000000000..342ad27f631b
--- /dev/null
+++ b/tools/testing/selftests/net/gro.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev=""
+test="all"
+proto="ipv4"
+
+run_test() {
+  local server_pid=0
+  local exit_code=0
+  local protocol=$1
+  local test=$2
+  local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+  "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+  setup_ns
+  # Each test is run 3 times to deflake, because given the receive timing,
+  # not all packets that should coalesce will be considered in the same flow
+  # on every try.
+  for tries in {1..3}; do
+    # Actual test starts here
+    ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+      1>>log.txt &
+    server_pid=$!
+    sleep 0.5  # to allow for socket init
+    ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+      1>>log.txt
+    wait "${server_pid}"
+    exit_code=$?
+    if [[ "${exit_code}" -eq 0 ]]; then
+        break;
+    fi
+  done
+  cleanup_ns
+  echo ${exit_code}
+}
+
+run_all_tests() {
+  local failed_tests=()
+  for proto in "${PROTOS[@]}"; do
+    for test in "${TESTS[@]}"; do
+      echo "running test ${proto} ${test}" >&2
+      exit_code=$(run_test $proto $test)
+      if [[ "${exit_code}" -ne 0 ]]; then
+        failed_tests+=("${proto}_${test}")
+      fi;
+    done;
+  done
+  if [[ ${#failed_tests[@]} -ne 0 ]]; then
+    echo "failed tests: ${failed_tests[*]}. \
+    Please see log.txt for more logs"
+    exit 1
+  else
+    echo "All Tests Succeeded!"
+  fi;
+}
+
+usage() {
+  echo "Usage: $0 \
+  [-i <DEV>] \
+  [-t data|ack|flags|tcp|ip|large] \
+  [-p <ipv4|ipv6>]" 1>&2;
+  exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+  case "${opt}" in
+    i)
+      dev="${OPTARG}"
+      ;;
+    t)
+      test="${OPTARG}"
+      ;;
+    p)
+      proto="${OPTARG}"
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+if [ -n "$dev" ]; then
+	source setup_loopback.sh
+else
+	source setup_veth.sh
+fi
+
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+  run_all_tests
+else
+  run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
new file mode 100755
index 000000000000..3caf72bb9c6a
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -0,0 +1,652 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
+# consistency directly inside packets on the receiver side. Tests are divided
+# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
+# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates
+# wider use cases that do not fall into the other two categories). Both OUTPUT
+# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
+# tests use the entire three-node topology (alpha, beta, gamma). Each test is
+# documented inside its own handler in the code below.
+#
+# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
+# When either Beta or Gamma is the destination (depending on the test category),
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+#
+#
+#            +-------------------+            +-------------------+
+#            |                   |            |                   |
+#            |    Alpha netns    |            |    Gamma netns    |
+#            |                   |            |                   |
+#            |  +-------------+  |            |  +-------------+  |
+#            |  |    veth0    |  |            |  |    veth0    |  |
+#            |  |  db01::2/64 |  |            |  |  db02::2/64 |  |
+#            |  +-------------+  |            |  +-------------+  |
+#            |         .         |            |         .         |
+#            +-------------------+            +-------------------+
+#                      .                                .
+#                      .                                .
+#                      .                                .
+#            +----------------------------------------------------+
+#            |         .                                .         |
+#            |  +-------------+                  +-------------+  |
+#            |  |    veth0    |                  |    veth1    |  |
+#            |  |  db01::1/64 | ................ |  db02::1/64 |  |
+#            |  +-------------+                  +-------------+  |
+#            |                                                    |
+#            |                      Beta netns                    |
+#            |                                                    |
+#            +----------------------------------------------------+
+#
+#
+#
+#        =============================================================
+#        |                Alpha - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 1                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 11111111                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 0xffff (default value)              |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 0xffffffff (default value)          |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 101                                 |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 101101                              |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee0                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf00dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 777                                 |
+#        +-----------------------------------------------------------+
+#        | Schema Data         | something that will be 4n-aligned   |
+#        +-----------------------------------------------------------+
+#
+#
+#        =============================================================
+#        |                 Beta - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 2                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 22222222                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 201                                 |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 201201                              |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 202                                 |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 202202                              |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee1                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf11dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 666                                 |
+#        +-----------------------------------------------------------+
+#        | Schema Data         | Hello there -Obi                    |
+#        +-----------------------------------------------------------+
+#
+#
+#        =============================================================
+#        |                Gamma - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 3                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 33333333                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 301                                 |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 301301                              |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 0xffff (default value)              |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 0xffffffff (default value)          |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee2                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf22dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 0xffffff (= None)                   |
+#        +-----------------------------------------------------------+
+#        | Schema Data         |                                     |
+#        +-----------------------------------------------------------+
+
+
+################################################################################
+#                                                                              #
+# WARNING: Be careful if you modify the block below - it MUST be kept          #
+#          synchronized with configurations inside ioam6_parser.c and always   #
+#          reflect the same.                                                   #
+#                                                                              #
+################################################################################
+
+ALPHA=(
+	1					# ID
+	11111111				# Wide ID
+	0xffff					# Ingress ID
+	0xffffffff				# Ingress Wide ID
+	101					# Egress ID
+	101101					# Egress Wide ID
+	0xdeadbee0				# Namespace Data
+	0xcafec0caf00dc0de			# Namespace Wide Data
+	777					# Schema ID (0xffffff = None)
+	"something that will be 4n-aligned"	# Schema Data
+)
+
+BETA=(
+	2
+	22222222
+	201
+	201201
+	202
+	202202
+	0xdeadbee1
+	0xcafec0caf11dc0de
+	666
+	"Hello there -Obi"
+)
+
+GAMMA=(
+	3
+	33333333
+	301
+	301301
+	0xffff
+	0xffffffff
+	0xdeadbee2
+	0xcafec0caf22dc0de
+	0xffffff
+	""
+)
+
+TESTS_OUTPUT="
+	out_undef_ns
+	out_no_room
+	out_bits
+	out_full_supp_trace
+"
+
+TESTS_INPUT="
+	in_undef_ns
+	in_no_room
+	in_oflag
+	in_bits
+	in_full_supp_trace
+"
+
+TESTS_GLOBAL="
+	fwd_full_supp_trace
+"
+
+
+################################################################################
+#                                                                              #
+#                                   LIBRARY                                    #
+#                                                                              #
+################################################################################
+
+check_kernel_compatibility()
+{
+  ip netns add ioam-tmp-node
+  ip link add name veth0 netns ioam-tmp-node type veth \
+         peer name veth1 netns ioam-tmp-node
+
+  ip -netns ioam-tmp-node link set veth0 up
+  ip -netns ioam-tmp-node link set veth1 up
+
+  ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null
+  ns_ad=$?
+
+  ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0"
+  ns_sh=$?
+
+  if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+  then
+    echo "SKIP: kernel version probably too old, missing ioam support"
+    ip link del veth0 2>/dev/null || true
+    ip netns del ioam-tmp-node || true
+    exit 1
+  fi
+
+  ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \
+         type 0x800000 ns 0 size 4 dev veth0 &>/dev/null
+  tr_ad=$?
+
+  ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace"
+  tr_sh=$?
+
+  if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+  then
+    echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
+         "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
+    ip link del veth0 2>/dev/null || true
+    ip netns del ioam-tmp-node || true
+    exit 1
+  fi
+
+  ip link del veth0 2>/dev/null || true
+  ip netns del ioam-tmp-node || true
+}
+
+cleanup()
+{
+  ip link del ioam-veth-alpha 2>/dev/null || true
+  ip link del ioam-veth-gamma 2>/dev/null || true
+
+  ip netns del ioam-node-alpha || true
+  ip netns del ioam-node-beta || true
+  ip netns del ioam-node-gamma || true
+}
+
+setup()
+{
+  ip netns add ioam-node-alpha
+  ip netns add ioam-node-beta
+  ip netns add ioam-node-gamma
+
+  ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \
+         peer name ioam-veth-betaL netns ioam-node-beta
+  ip link add name ioam-veth-betaR netns ioam-node-beta type veth \
+         peer name ioam-veth-gamma netns ioam-node-gamma
+
+  ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0
+  ip -netns ioam-node-beta link set ioam-veth-betaL name veth0
+  ip -netns ioam-node-beta link set ioam-veth-betaR name veth1
+  ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0
+
+  ip -netns ioam-node-alpha addr add db01::2/64 dev veth0
+  ip -netns ioam-node-alpha link set veth0 up
+  ip -netns ioam-node-alpha link set lo up
+  ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0
+  ip -netns ioam-node-alpha route del db01::/64
+  ip -netns ioam-node-alpha route add db01::/64 dev veth0
+
+  ip -netns ioam-node-beta addr add db01::1/64 dev veth0
+  ip -netns ioam-node-beta addr add db02::1/64 dev veth1
+  ip -netns ioam-node-beta link set veth0 up
+  ip -netns ioam-node-beta link set veth1 up
+  ip -netns ioam-node-beta link set lo up
+
+  ip -netns ioam-node-gamma addr add db02::2/64 dev veth0
+  ip -netns ioam-node-gamma link set veth0 up
+  ip -netns ioam-node-gamma link set lo up
+  ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0
+
+  # - IOAM config -
+  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+  ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+  ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+  ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+  ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+  ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}"
+  ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]}
+
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+  ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+
+  sleep 1
+
+  ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+  if [ $? != 0 ]
+  then
+    echo "Setup FAILED"
+    cleanup &>/dev/null
+    exit 0
+  fi
+}
+
+log_test_passed()
+{
+  local desc=$1
+  printf "TEST: %-60s  [ OK ]\n" "${desc}"
+}
+
+log_test_failed()
+{
+  local desc=$1
+  printf "TEST: %-60s  [FAIL]\n" "${desc}"
+}
+
+run_test()
+{
+  local name=$1
+  local desc=$2
+  local node_src=$3
+  local node_dst=$4
+  local ip6_src=$5
+  local ip6_dst=$6
+  local if_dst=$7
+  local trace_type=$8
+  local ioam_ns=$9
+
+  ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \
+         $trace_type $ioam_ns &
+  local spid=$!
+  sleep 0.1
+
+  ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+  if [ $? != 0 ]
+  then
+    log_test_failed "${desc}"
+    kill -2 $spid &>/dev/null
+  else
+    wait $spid
+    [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+  fi
+}
+
+run()
+{
+  echo
+  echo "OUTPUT tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  # set OUTPUT settings
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+
+  for t in $TESTS_OUTPUT
+  do
+    $t
+  done
+
+  # clean OUTPUT settings
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+  echo
+  echo "INPUT tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  # set INPUT settings
+  ip -netns ioam-node-alpha ioam namespace del 123
+
+  for t in $TESTS_INPUT
+  do
+    $t
+  done
+
+  # clean INPUT settings
+  ip -netns ioam-node-alpha ioam namespace add 123 \
+         data ${ALPHA[6]} wide ${ALPHA[7]}
+  ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+  ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+  echo
+  echo "GLOBAL tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  for t in $TESTS_GLOBAL
+  do
+    $t
+  done
+}
+
+bit2type=(
+  0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
+  0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
+  0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+)
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+
+
+################################################################################
+#                                                                              #
+#                              OUTPUT tests                                    #
+#                                                                              #
+#   Two nodes (sender/receiver), IOAM disabled on ingress for the receiver.    #
+################################################################################
+
+out_undef_ns()
+{
+  ##############################################################################
+  # Make sure that the encap node won't fill the trace if the chosen IOAM      #
+  # namespace is not configured locally.                                       #
+  ##############################################################################
+  local desc="Unknown IOAM namespace"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0x800000 ns 0 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0x800000 0
+}
+
+out_no_room()
+{
+  ##############################################################################
+  # Make sure that the encap node won't fill the trace and will set the        #
+  # Overflow flag since there is no room enough for its data.                  #
+  ##############################################################################
+  local desc="Missing trace room"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xc00000 123
+}
+
+out_bits()
+{
+  ##############################################################################
+  # Make sure that, for each trace type bit, the encap node will either:       #
+  #  (i)  fill the trace with its data when it is a supported bit              #
+  #  (ii) not fill the trace with its data when it is an unsupported bit       #
+  ##############################################################################
+  local desc="Trace type with bit <n> only"
+
+  local tmp=${bit2size[22]}
+  bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+
+  for i in {0..22}
+  do
+    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+           prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+    run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+           db01::2 db01::1 veth0 ${bit2type[$i]} 123
+  done
+
+  bit2size[22]=$tmp
+}
+
+out_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that the encap node will correctly fill a full trace. Be careful,#
+  # "full trace" here does NOT mean all bits (only supported ones).            #
+  ##############################################################################
+  local desc="Full supported trace"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xfff002 ns 123 size 100 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+#                                                                              #
+#                               INPUT tests                                    #
+#                                                                              #
+#     Two nodes (sender/receiver), the sender MUST NOT fill the trace upon     #
+#     insertion -> the IOAM namespace configured on the sender is removed      #
+#     and is used in the inserted trace to force the sender not to fill it.    #
+################################################################################
+
+in_undef_ns()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace if the related IOAM #
+  # namespace is not configured locally.                                       #
+  ##############################################################################
+  local desc="Unknown IOAM namespace"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0x800000 ns 0 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0x800000 0
+}
+
+in_no_room()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace and will set the    #
+  # Overflow flag if there is no room enough for its data.                     #
+  ##############################################################################
+  local desc="Missing trace room"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xc00000 123
+}
+
+in_bits()
+{
+  ##############################################################################
+  # Make sure that, for each trace type bit, the receiving node will either:   #
+  #  (i)  fill the trace with its data when it is a supported bit              #
+  #  (ii) not fill the trace with its data when it is an unsupported bit       #
+  ##############################################################################
+  local desc="Trace type with bit <n> only"
+
+  local tmp=${bit2size[22]}
+  bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+  for i in {0..22}
+  do
+    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+           prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+    run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+           db01::2 db01::1 veth0 ${bit2type[$i]} 123
+  done
+
+  bit2size[22]=$tmp
+}
+
+in_oflag()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace since the Overflow  #
+  # flag is set.                                                               #
+  ##############################################################################
+  local desc="Overflow flag is set"
+
+  # Exception:
+  #   Here, we need the sender to set the Overflow flag. For that, we will add
+  #   back the IOAM namespace that was previously configured on the sender.
+  ip -netns ioam-node-alpha ioam namespace add 123
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xc00000 123
+
+  # And we clean the exception for this test to get things back to normal for
+  # other INPUT tests
+  ip -netns ioam-node-alpha ioam namespace del 123
+}
+
+in_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that the receiving node will correctly fill a full trace. Be     #
+  # careful, "full trace" here does NOT mean all bits (only supported ones).   #
+  ##############################################################################
+  local desc="Full supported trace"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xfff002 ns 123 size 80 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+#                                                                              #
+#                              GLOBAL tests                                    #
+#                                                                              #
+#   Three nodes (sender/router/receiver), IOAM fully enabled on every node.    #
+################################################################################
+
+fwd_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that all three nodes correctly filled the full supported trace   #
+  # by checking that the trace data is consistent with the predefined config.  #
+  ##############################################################################
+  local desc="Forward - Full supported trace"
+
+  ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \
+         type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \
+         db02::2 veth0 0xfff002 123
+}
+
+
+################################################################################
+#                                                                              #
+#                                     MAIN                                     #
+#                                                                              #
+################################################################################
+
+if [ "$(id -u)" -ne 0 ]
+then
+  echo "SKIP: Need root privileges"
+  exit 1
+fi
+
+if [ ! -x "$(command -v ip)" ]
+then
+  echo "SKIP: Could not run test without ip tool"
+  exit 1
+fi
+
+ip ioam &>/dev/null
+if [ $? = 1 ]
+then
+  echo "SKIP: iproute2 too old, missing ioam command"
+  exit 1
+fi
+
+check_kernel_compatibility
+
+cleanup &>/dev/null
+setup
+run
+cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
new file mode 100644
index 000000000000..d376cb2c383c
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Justin Iurman (justin.iurman@uliege.be)
+ *
+ * IOAM tester for IPv6, see ioam6.sh for details on each test case.
+ */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/const.h>
+#include <linux/if_ether.h>
+#include <linux/ioam6.h>
+#include <linux/ipv6.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct ioam_config {
+	__u32 id;
+	__u64 wide;
+	__u16 ingr_id;
+	__u16 egr_id;
+	__u32 ingr_wide;
+	__u32 egr_wide;
+	__u32 ns_data;
+	__u64 ns_wide;
+	__u32 sc_id;
+	__u8 hlim;
+	char *sc_data;
+};
+
+/*
+ * Be careful if you modify structs below - everything MUST be kept synchronized
+ * with configurations inside ioam6.sh and always reflect the same.
+ */
+
+static struct ioam_config node1 = {
+	.id = 1,
+	.wide = 11111111,
+	.ingr_id = 0xffff, /* default value */
+	.egr_id = 101,
+	.ingr_wide = 0xffffffff, /* default value */
+	.egr_wide = 101101,
+	.ns_data = 0xdeadbee0,
+	.ns_wide = 0xcafec0caf00dc0de,
+	.sc_id = 777,
+	.sc_data = "something that will be 4n-aligned",
+	.hlim = 64,
+};
+
+static struct ioam_config node2 = {
+	.id = 2,
+	.wide = 22222222,
+	.ingr_id = 201,
+	.egr_id = 202,
+	.ingr_wide = 201201,
+	.egr_wide = 202202,
+	.ns_data = 0xdeadbee1,
+	.ns_wide = 0xcafec0caf11dc0de,
+	.sc_id = 666,
+	.sc_data = "Hello there -Obi",
+	.hlim = 63,
+};
+
+static struct ioam_config node3 = {
+	.id = 3,
+	.wide = 33333333,
+	.ingr_id = 301,
+	.egr_id = 0xffff, /* default value */
+	.ingr_wide = 301301,
+	.egr_wide = 0xffffffff, /* default value */
+	.ns_data = 0xdeadbee2,
+	.ns_wide = 0xcafec0caf22dc0de,
+	.sc_id = 0xffffff, /* default value */
+	.sc_data = NULL,
+	.hlim = 62,
+};
+
+enum {
+	/**********
+	 * OUTPUT *
+	 **********/
+	TEST_OUT_UNDEF_NS,
+	TEST_OUT_NO_ROOM,
+	TEST_OUT_BIT0,
+	TEST_OUT_BIT1,
+	TEST_OUT_BIT2,
+	TEST_OUT_BIT3,
+	TEST_OUT_BIT4,
+	TEST_OUT_BIT5,
+	TEST_OUT_BIT6,
+	TEST_OUT_BIT7,
+	TEST_OUT_BIT8,
+	TEST_OUT_BIT9,
+	TEST_OUT_BIT10,
+	TEST_OUT_BIT11,
+	TEST_OUT_BIT12,
+	TEST_OUT_BIT13,
+	TEST_OUT_BIT14,
+	TEST_OUT_BIT15,
+	TEST_OUT_BIT16,
+	TEST_OUT_BIT17,
+	TEST_OUT_BIT18,
+	TEST_OUT_BIT19,
+	TEST_OUT_BIT20,
+	TEST_OUT_BIT21,
+	TEST_OUT_BIT22,
+	TEST_OUT_FULL_SUPP_TRACE,
+
+	/*********
+	 * INPUT *
+	 *********/
+	TEST_IN_UNDEF_NS,
+	TEST_IN_NO_ROOM,
+	TEST_IN_OFLAG,
+	TEST_IN_BIT0,
+	TEST_IN_BIT1,
+	TEST_IN_BIT2,
+	TEST_IN_BIT3,
+	TEST_IN_BIT4,
+	TEST_IN_BIT5,
+	TEST_IN_BIT6,
+	TEST_IN_BIT7,
+	TEST_IN_BIT8,
+	TEST_IN_BIT9,
+	TEST_IN_BIT10,
+	TEST_IN_BIT11,
+	TEST_IN_BIT12,
+	TEST_IN_BIT13,
+	TEST_IN_BIT14,
+	TEST_IN_BIT15,
+	TEST_IN_BIT16,
+	TEST_IN_BIT17,
+	TEST_IN_BIT18,
+	TEST_IN_BIT19,
+	TEST_IN_BIT20,
+	TEST_IN_BIT21,
+	TEST_IN_BIT22,
+	TEST_IN_FULL_SUPP_TRACE,
+
+	/**********
+	 * GLOBAL *
+	 **********/
+	TEST_FWD_FULL_SUPP_TRACE,
+
+	__TEST_MAX,
+};
+
+static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
+			     __u32 trace_type, __u16 ioam_ns)
+{
+	if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
+	    __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+		return 1;
+
+	switch (tid) {
+	case TEST_OUT_UNDEF_NS:
+	case TEST_IN_UNDEF_NS:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen != 1 ||
+		       ioam6h->remlen != 1;
+
+	case TEST_OUT_NO_ROOM:
+	case TEST_IN_NO_ROOM:
+	case TEST_IN_OFLAG:
+		return !ioam6h->overflow ||
+		       ioam6h->nodelen != 2 ||
+		       ioam6h->remlen != 1;
+
+	case TEST_OUT_BIT0:
+	case TEST_IN_BIT0:
+	case TEST_OUT_BIT1:
+	case TEST_IN_BIT1:
+	case TEST_OUT_BIT2:
+	case TEST_IN_BIT2:
+	case TEST_OUT_BIT3:
+	case TEST_IN_BIT3:
+	case TEST_OUT_BIT4:
+	case TEST_IN_BIT4:
+	case TEST_OUT_BIT5:
+	case TEST_IN_BIT5:
+	case TEST_OUT_BIT6:
+	case TEST_IN_BIT6:
+	case TEST_OUT_BIT7:
+	case TEST_IN_BIT7:
+	case TEST_OUT_BIT11:
+	case TEST_IN_BIT11:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen != 1 ||
+		       ioam6h->remlen;
+
+	case TEST_OUT_BIT8:
+	case TEST_IN_BIT8:
+	case TEST_OUT_BIT9:
+	case TEST_IN_BIT9:
+	case TEST_OUT_BIT10:
+	case TEST_IN_BIT10:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen != 2 ||
+		       ioam6h->remlen;
+
+	case TEST_OUT_BIT12:
+	case TEST_IN_BIT12:
+	case TEST_OUT_BIT13:
+	case TEST_IN_BIT13:
+	case TEST_OUT_BIT14:
+	case TEST_IN_BIT14:
+	case TEST_OUT_BIT15:
+	case TEST_IN_BIT15:
+	case TEST_OUT_BIT16:
+	case TEST_IN_BIT16:
+	case TEST_OUT_BIT17:
+	case TEST_IN_BIT17:
+	case TEST_OUT_BIT18:
+	case TEST_IN_BIT18:
+	case TEST_OUT_BIT19:
+	case TEST_IN_BIT19:
+	case TEST_OUT_BIT20:
+	case TEST_IN_BIT20:
+	case TEST_OUT_BIT21:
+	case TEST_IN_BIT21:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen ||
+		       ioam6h->remlen != 1;
+
+	case TEST_OUT_BIT22:
+	case TEST_IN_BIT22:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen ||
+		       ioam6h->remlen;
+
+	case TEST_OUT_FULL_SUPP_TRACE:
+	case TEST_IN_FULL_SUPP_TRACE:
+	case TEST_FWD_FULL_SUPP_TRACE:
+		return ioam6h->overflow ||
+		       ioam6h->nodelen != 15 ||
+		       ioam6h->remlen;
+
+	default:
+		break;
+	}
+
+	return 1;
+}
+
+static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
+			    const struct ioam_config cnf)
+{
+	unsigned int len;
+	__u8 aligned;
+	__u64 raw64;
+	__u32 raw32;
+
+	if (ioam6h->type.bit0) {
+		raw32 = __be32_to_cpu(*((__u32 *)*p));
+		if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit1) {
+		raw32 = __be32_to_cpu(*((__u32 *)*p));
+		if (cnf.ingr_id != (raw32 >> 16) ||
+		    cnf.egr_id != (raw32 & 0xffff))
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit2)
+		*p += sizeof(__u32);
+
+	if (ioam6h->type.bit3)
+		*p += sizeof(__u32);
+
+	if (ioam6h->type.bit4) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit5) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit6) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit7) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit8) {
+		raw64 = __be64_to_cpu(*((__u64 *)*p));
+		if (cnf.hlim != (raw64 >> 56) ||
+		    cnf.wide != (raw64 & 0xffffffffffffff))
+			return 1;
+		*p += sizeof(__u64);
+	}
+
+	if (ioam6h->type.bit9) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
+			return 1;
+		*p += sizeof(__u32);
+
+		if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit10) {
+		if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+			return 1;
+		*p += sizeof(__u64);
+	}
+
+	if (ioam6h->type.bit11) {
+		if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+			return 1;
+		*p += sizeof(__u32);
+	}
+
+	if (ioam6h->type.bit22) {
+		len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
+		aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
+
+		raw32 = __be32_to_cpu(*((__u32 *)*p));
+		if (aligned != (raw32 >> 24) * 4 ||
+		    cnf.sc_id != (raw32 & 0xffffff))
+			return 1;
+		*p += sizeof(__u32);
+
+		if (cnf.sc_data) {
+			if (strncmp((char *)*p, cnf.sc_data, len))
+				return 1;
+
+			*p += len;
+			aligned -= len;
+
+			while (aligned--) {
+				if (**p != '\0')
+					return 1;
+				*p += sizeof(__u8);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
+				      __u32 trace_type, __u16 ioam_ns)
+{
+	__u8 *p;
+
+	if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+		return 1;
+
+	p = ioam6h->data + ioam6h->remlen * 4;
+
+	switch (tid) {
+	case TEST_OUT_BIT0:
+	case TEST_OUT_BIT1:
+	case TEST_OUT_BIT2:
+	case TEST_OUT_BIT3:
+	case TEST_OUT_BIT4:
+	case TEST_OUT_BIT5:
+	case TEST_OUT_BIT6:
+	case TEST_OUT_BIT7:
+	case TEST_OUT_BIT8:
+	case TEST_OUT_BIT9:
+	case TEST_OUT_BIT10:
+	case TEST_OUT_BIT11:
+	case TEST_OUT_BIT22:
+	case TEST_OUT_FULL_SUPP_TRACE:
+		return check_ioam6_data(&p, ioam6h, node1);
+
+	case TEST_IN_BIT0:
+	case TEST_IN_BIT1:
+	case TEST_IN_BIT2:
+	case TEST_IN_BIT3:
+	case TEST_IN_BIT4:
+	case TEST_IN_BIT5:
+	case TEST_IN_BIT6:
+	case TEST_IN_BIT7:
+	case TEST_IN_BIT8:
+	case TEST_IN_BIT9:
+	case TEST_IN_BIT10:
+	case TEST_IN_BIT11:
+	case TEST_IN_BIT22:
+	case TEST_IN_FULL_SUPP_TRACE:
+	{
+		__u32 tmp32 = node2.egr_wide;
+		__u16 tmp16 = node2.egr_id;
+		int res;
+
+		node2.egr_id = 0xffff;
+		node2.egr_wide = 0xffffffff;
+
+		res = check_ioam6_data(&p, ioam6h, node2);
+
+		node2.egr_id = tmp16;
+		node2.egr_wide = tmp32;
+
+		return res;
+	}
+
+	case TEST_FWD_FULL_SUPP_TRACE:
+		if (check_ioam6_data(&p, ioam6h, node3))
+			return 1;
+		if (check_ioam6_data(&p, ioam6h, node2))
+			return 1;
+		return check_ioam6_data(&p, ioam6h, node1);
+
+	default:
+		break;
+	}
+
+	return 1;
+}
+
+static int str2id(const char *tname)
+{
+	if (!strcmp("out_undef_ns", tname))
+		return TEST_OUT_UNDEF_NS;
+	if (!strcmp("out_no_room", tname))
+		return TEST_OUT_NO_ROOM;
+	if (!strcmp("out_bit0", tname))
+		return TEST_OUT_BIT0;
+	if (!strcmp("out_bit1", tname))
+		return TEST_OUT_BIT1;
+	if (!strcmp("out_bit2", tname))
+		return TEST_OUT_BIT2;
+	if (!strcmp("out_bit3", tname))
+		return TEST_OUT_BIT3;
+	if (!strcmp("out_bit4", tname))
+		return TEST_OUT_BIT4;
+	if (!strcmp("out_bit5", tname))
+		return TEST_OUT_BIT5;
+	if (!strcmp("out_bit6", tname))
+		return TEST_OUT_BIT6;
+	if (!strcmp("out_bit7", tname))
+		return TEST_OUT_BIT7;
+	if (!strcmp("out_bit8", tname))
+		return TEST_OUT_BIT8;
+	if (!strcmp("out_bit9", tname))
+		return TEST_OUT_BIT9;
+	if (!strcmp("out_bit10", tname))
+		return TEST_OUT_BIT10;
+	if (!strcmp("out_bit11", tname))
+		return TEST_OUT_BIT11;
+	if (!strcmp("out_bit12", tname))
+		return TEST_OUT_BIT12;
+	if (!strcmp("out_bit13", tname))
+		return TEST_OUT_BIT13;
+	if (!strcmp("out_bit14", tname))
+		return TEST_OUT_BIT14;
+	if (!strcmp("out_bit15", tname))
+		return TEST_OUT_BIT15;
+	if (!strcmp("out_bit16", tname))
+		return TEST_OUT_BIT16;
+	if (!strcmp("out_bit17", tname))
+		return TEST_OUT_BIT17;
+	if (!strcmp("out_bit18", tname))
+		return TEST_OUT_BIT18;
+	if (!strcmp("out_bit19", tname))
+		return TEST_OUT_BIT19;
+	if (!strcmp("out_bit20", tname))
+		return TEST_OUT_BIT20;
+	if (!strcmp("out_bit21", tname))
+		return TEST_OUT_BIT21;
+	if (!strcmp("out_bit22", tname))
+		return TEST_OUT_BIT22;
+	if (!strcmp("out_full_supp_trace", tname))
+		return TEST_OUT_FULL_SUPP_TRACE;
+	if (!strcmp("in_undef_ns", tname))
+		return TEST_IN_UNDEF_NS;
+	if (!strcmp("in_no_room", tname))
+		return TEST_IN_NO_ROOM;
+	if (!strcmp("in_oflag", tname))
+		return TEST_IN_OFLAG;
+	if (!strcmp("in_bit0", tname))
+		return TEST_IN_BIT0;
+	if (!strcmp("in_bit1", tname))
+		return TEST_IN_BIT1;
+	if (!strcmp("in_bit2", tname))
+		return TEST_IN_BIT2;
+	if (!strcmp("in_bit3", tname))
+		return TEST_IN_BIT3;
+	if (!strcmp("in_bit4", tname))
+		return TEST_IN_BIT4;
+	if (!strcmp("in_bit5", tname))
+		return TEST_IN_BIT5;
+	if (!strcmp("in_bit6", tname))
+		return TEST_IN_BIT6;
+	if (!strcmp("in_bit7", tname))
+		return TEST_IN_BIT7;
+	if (!strcmp("in_bit8", tname))
+		return TEST_IN_BIT8;
+	if (!strcmp("in_bit9", tname))
+		return TEST_IN_BIT9;
+	if (!strcmp("in_bit10", tname))
+		return TEST_IN_BIT10;
+	if (!strcmp("in_bit11", tname))
+		return TEST_IN_BIT11;
+	if (!strcmp("in_bit12", tname))
+		return TEST_IN_BIT12;
+	if (!strcmp("in_bit13", tname))
+		return TEST_IN_BIT13;
+	if (!strcmp("in_bit14", tname))
+		return TEST_IN_BIT14;
+	if (!strcmp("in_bit15", tname))
+		return TEST_IN_BIT15;
+	if (!strcmp("in_bit16", tname))
+		return TEST_IN_BIT16;
+	if (!strcmp("in_bit17", tname))
+		return TEST_IN_BIT17;
+	if (!strcmp("in_bit18", tname))
+		return TEST_IN_BIT18;
+	if (!strcmp("in_bit19", tname))
+		return TEST_IN_BIT19;
+	if (!strcmp("in_bit20", tname))
+		return TEST_IN_BIT20;
+	if (!strcmp("in_bit21", tname))
+		return TEST_IN_BIT21;
+	if (!strcmp("in_bit22", tname))
+		return TEST_IN_BIT22;
+	if (!strcmp("in_full_supp_trace", tname))
+		return TEST_IN_FULL_SUPP_TRACE;
+	if (!strcmp("fwd_full_supp_trace", tname))
+		return TEST_FWD_FULL_SUPP_TRACE;
+
+	return -1;
+}
+
+static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+	return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
+		(a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
+		(a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
+		(a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
+}
+
+static int get_u32(__u32 *val, const char *arg, int base)
+{
+	unsigned long res;
+	char *ptr;
+
+	if (!arg || !*arg)
+		return -1;
+	res = strtoul(arg, &ptr, base);
+
+	if (!ptr || ptr == arg || *ptr)
+		return -1;
+
+	if (res == ULONG_MAX && errno == ERANGE)
+		return -1;
+
+	if (res > 0xFFFFFFFFUL)
+		return -1;
+
+	*val = res;
+	return 0;
+}
+
+static int get_u16(__u16 *val, const char *arg, int base)
+{
+	unsigned long res;
+	char *ptr;
+
+	if (!arg || !*arg)
+		return -1;
+	res = strtoul(arg, &ptr, base);
+
+	if (!ptr || ptr == arg || *ptr)
+		return -1;
+
+	if (res == ULONG_MAX && errno == ERANGE)
+		return -1;
+
+	if (res > 0xFFFFUL)
+		return -1;
+
+	*val = res;
+	return 0;
+}
+
+static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
+	[TEST_OUT_UNDEF_NS]		= check_ioam_header,
+	[TEST_OUT_NO_ROOM]		= check_ioam_header,
+	[TEST_OUT_BIT0]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT1]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT2]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT3]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT4]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT5]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT6]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT7]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT8]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT9]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT10]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT11]		= check_ioam_header_and_data,
+	[TEST_OUT_BIT12]		= check_ioam_header,
+	[TEST_OUT_BIT13]		= check_ioam_header,
+	[TEST_OUT_BIT14]		= check_ioam_header,
+	[TEST_OUT_BIT15]		= check_ioam_header,
+	[TEST_OUT_BIT16]		= check_ioam_header,
+	[TEST_OUT_BIT17]		= check_ioam_header,
+	[TEST_OUT_BIT18]		= check_ioam_header,
+	[TEST_OUT_BIT19]		= check_ioam_header,
+	[TEST_OUT_BIT20]		= check_ioam_header,
+	[TEST_OUT_BIT21]		= check_ioam_header,
+	[TEST_OUT_BIT22]		= check_ioam_header_and_data,
+	[TEST_OUT_FULL_SUPP_TRACE]	= check_ioam_header_and_data,
+	[TEST_IN_UNDEF_NS]		= check_ioam_header,
+	[TEST_IN_NO_ROOM]		= check_ioam_header,
+	[TEST_IN_OFLAG]		= check_ioam_header,
+	[TEST_IN_BIT0]			= check_ioam_header_and_data,
+	[TEST_IN_BIT1]			= check_ioam_header_and_data,
+	[TEST_IN_BIT2]			= check_ioam_header_and_data,
+	[TEST_IN_BIT3]			= check_ioam_header_and_data,
+	[TEST_IN_BIT4]			= check_ioam_header_and_data,
+	[TEST_IN_BIT5]			= check_ioam_header_and_data,
+	[TEST_IN_BIT6]			= check_ioam_header_and_data,
+	[TEST_IN_BIT7]			= check_ioam_header_and_data,
+	[TEST_IN_BIT8]			= check_ioam_header_and_data,
+	[TEST_IN_BIT9]			= check_ioam_header_and_data,
+	[TEST_IN_BIT10]		= check_ioam_header_and_data,
+	[TEST_IN_BIT11]		= check_ioam_header_and_data,
+	[TEST_IN_BIT12]		= check_ioam_header,
+	[TEST_IN_BIT13]		= check_ioam_header,
+	[TEST_IN_BIT14]		= check_ioam_header,
+	[TEST_IN_BIT15]		= check_ioam_header,
+	[TEST_IN_BIT16]		= check_ioam_header,
+	[TEST_IN_BIT17]		= check_ioam_header,
+	[TEST_IN_BIT18]		= check_ioam_header,
+	[TEST_IN_BIT19]		= check_ioam_header,
+	[TEST_IN_BIT20]		= check_ioam_header,
+	[TEST_IN_BIT21]		= check_ioam_header,
+	[TEST_IN_BIT22]		= check_ioam_header_and_data,
+	[TEST_IN_FULL_SUPP_TRACE]	= check_ioam_header_and_data,
+	[TEST_FWD_FULL_SUPP_TRACE]	= check_ioam_header_and_data,
+};
+
+int main(int argc, char **argv)
+{
+	int fd, size, hoplen, tid, ret = 1;
+	struct in6_addr src, dst;
+	struct ioam6_hdr *opt;
+	struct ipv6hdr *ip6h;
+	__u8 buffer[400], *p;
+	__u16 ioam_ns;
+	__u32 tr_type;
+
+	if (argc != 7)
+		goto out;
+
+	tid = str2id(argv[2]);
+	if (tid < 0 || !func[tid])
+		goto out;
+
+	if (inet_pton(AF_INET6, argv[3], &src) != 1 ||
+	    inet_pton(AF_INET6, argv[4], &dst) != 1)
+		goto out;
+
+	if (get_u32(&tr_type, argv[5], 16) ||
+	    get_u16(&ioam_ns, argv[6], 0))
+		goto out;
+
+	fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
+	if (!fd)
+		goto out;
+
+	if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+		       argv[1], strlen(argv[1])))
+		goto close;
+
+recv:
+	size = recv(fd, buffer, sizeof(buffer), 0);
+	if (size <= 0)
+		goto close;
+
+	ip6h = (struct ipv6hdr *)buffer;
+
+	if (!ipv6_addr_equal(&ip6h->saddr, &src) ||
+	    !ipv6_addr_equal(&ip6h->daddr, &dst))
+		goto recv;
+
+	if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+		goto close;
+
+	p = buffer + sizeof(*ip6h);
+	hoplen = (p[1] + 1) << 3;
+	p += sizeof(struct ipv6_hopopt_hdr);
+
+	while (hoplen > 0) {
+		opt = (struct ioam6_hdr *)p;
+
+		if (opt->opt_type == IPV6_TLV_IOAM &&
+		    opt->type == IOAM6_TYPE_PREALLOC) {
+			p += sizeof(*opt);
+			ret = func[tid](tid, (struct ioam6_trace_hdr *)p,
+					   tr_type, ioam_ns);
+			break;
+		}
+
+		p += opt->opt_len + 2;
+		hoplen -= opt->opt_len + 2;
+	}
+close:
+	close(fd);
+out:
+	return ret;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f02f4de2f3a0..255793c5ac4f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3,8 +3,10 @@
 
 ret=0
 sin=""
+sinfail=""
 sout=""
 cin=""
+cinfail=""
 cinsent=""
 cout=""
 ksft_skip=4
@@ -76,6 +78,14 @@ init()
 	done
 }
 
+init_shapers()
+{
+	for i in `seq 1 4`; do
+		tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+		tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
+	done
+}
+
 cleanup_partial()
 {
 	rm -f "$capout"
@@ -88,8 +98,8 @@ cleanup_partial()
 
 cleanup()
 {
-	rm -f "$cin" "$cout"
-	rm -f "$sin" "$sout" "$cinsent"
+	rm -f "$cin" "$cout" "$sinfail"
+	rm -f "$sin" "$sout" "$cinsent" "$cinfail"
 	cleanup_partial
 }
 
@@ -211,11 +221,15 @@ link_failure()
 {
 	ns="$1"
 
-	l=$((RANDOM%4))
-	l=$((l+1))
+	if [ -z "$FAILING_LINKS" ]; then
+		l=$((RANDOM%4))
+		FAILING_LINKS=$((l+1))
+	fi
 
-	veth="ns1eth$l"
-	ip -net "$ns" link set "$veth" down
+	for l in $FAILING_LINKS; do
+		veth="ns1eth$l"
+		ip -net "$ns" link set "$veth" down
+	done
 }
 
 # $1: IP address
@@ -280,10 +294,17 @@ do_transfer()
 		local_addr="0.0.0.0"
 	fi
 
-	timeout ${timeout_test} \
-		ip netns exec ${listener_ns} \
-			$mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-				${local_addr} < "$sin" > "$sout" &
+	if [ "$test_link_fail" -eq 2 ];then
+		timeout ${timeout_test} \
+			ip netns exec ${listener_ns} \
+				$mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
+					${local_addr} < "$sinfail" > "$sout" &
+	else
+		timeout ${timeout_test} \
+			ip netns exec ${listener_ns} \
+				$mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+					${local_addr} < "$sin" > "$sout" &
+	fi
 	spid=$!
 
 	sleep 1
@@ -294,7 +315,7 @@ do_transfer()
 				$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
 					$connect_addr < "$cin" > "$cout" &
 	else
-		( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+		( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
 			tee "$cinsent" | \
 			timeout ${timeout_test} \
 				ip netns exec ${connector_ns} \
@@ -323,17 +344,18 @@ do_transfer()
 		let rm_nr_ns1=-addr_nr_ns1
 		if [ $rm_nr_ns1 -lt 8 ]; then
 			counter=1
+			pos=1
 			dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
 			if [ ${#dump[@]} -gt 0 ]; then
-				id=${dump[1]}
 				sleep 1
 
 				while [ $counter -le $rm_nr_ns1 ]
 				do
+					id=${dump[$pos]}
 					ip netns exec ${listener_ns} ./pm_nl_ctl del $id
 					sleep 1
 					let counter+=1
-					let id+=1
+					let pos+=5
 				done
 			fi
 		elif [ $rm_nr_ns1 -eq 8 ]; then
@@ -345,6 +367,12 @@ do_transfer()
 		fi
 	fi
 
+	flags="subflow"
+	if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
+		flags="${flags},fullmesh"
+		addr_nr_ns2=${addr_nr_ns2:9}
+	fi
+
 	if [ $addr_nr_ns2 -gt 0 ]; then
 		let add_nr_ns2=addr_nr_ns2
 		counter=3
@@ -356,7 +384,7 @@ do_transfer()
 			else
 				addr="10.0.$counter.2"
 			fi
-			ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow
+			ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
 			let counter+=1
 			let add_nr_ns2-=1
 		done
@@ -365,17 +393,18 @@ do_transfer()
 		let rm_nr_ns2=-addr_nr_ns2
 		if [ $rm_nr_ns2 -lt 8 ]; then
 			counter=1
+			pos=1
 			dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
 			if [ ${#dump[@]} -gt 0 ]; then
-				id=${dump[1]}
 				sleep 1
 
 				while [ $counter -le $rm_nr_ns2 ]
 				do
+					id=${dump[$pos]}
 					ip netns exec ${connector_ns} ./pm_nl_ctl del $id
 					sleep 1
 					let counter+=1
-					let id+=1
+					let pos+=5
 				done
 			fi
 		elif [ $rm_nr_ns2 -eq 8 ]; then
@@ -434,7 +463,11 @@ do_transfer()
 		return 1
 	fi
 
-	check_transfer $sin $cout "file received by client"
+	if [ "$test_link_fail" -eq 2 ];then
+		check_transfer $sinfail $cout "file received by client"
+	else
+		check_transfer $sin $cout "file received by client"
+	fi
 	retc=$?
 	if [ "$test_link_fail" -eq 0 ];then
 		check_transfer $cin $sout "file received by server"
@@ -477,29 +510,33 @@ run_tests()
 	lret=0
 	oldin=""
 
-	if [ "$test_linkfail" -eq 1 ];then
-		size=$((RANDOM%1024))
+	# create the input file for the failure test when
+	# the first failure test run
+	if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
+		# the client file must be considerably larger
+		# of the maximum expected cwin value, or the
+		# link utilization will be not predicable
+		size=$((RANDOM%2))
 		size=$((size+1))
-		size=$((size*128))
+		size=$((size*8192))
+		size=$((size + ( $RANDOM % 8192) ))
 
-		oldin=$(mktemp)
-		cp "$cin" "$oldin"
-		make_file "$cin" "client" $size
+		cinfail=$(mktemp)
+		make_file "$cinfail" "client" $size
 	fi
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
-		${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
-	lret=$?
+	if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
+		size=$((RANDOM%16))
+		size=$((size+1))
+		size=$((size*2048))
 
-	if [ "$test_linkfail" -eq 1 ];then
-		cp "$oldin" "$cin"
-		rm -f "$oldin"
+		sinfail=$(mktemp)
+		make_file "$sinfail" "server" $size
 	fi
 
-	if [ $lret -ne 0 ]; then
-		ret=$lret
-		return
-	fi
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
+		${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
+	lret=$?
 }
 
 chk_csum_nr()
@@ -541,6 +578,43 @@ chk_csum_nr()
 	fi
 }
 
+chk_fail_nr()
+{
+	local mp_fail_nr_tx=$1
+	local mp_fail_nr_rx=$2
+	local count
+	local dump_stats
+
+	printf "%-39s %s" " " "ftx"
+	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$mp_fail_nr_tx" ]; then
+		echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - frx   "
+	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$mp_fail_nr_rx" ]; then
+		echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+		ret=1
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	if [ "${dump_stats}" = 1 ]; then
+		echo Server ns stats
+		ip netns exec $ns1 nstat -as | grep MPTcp
+		echo Client ns stats
+		ip netns exec $ns2 nstat -as | grep MPTcp
+	fi
+}
+
 chk_join_nr()
 {
 	local msg="$1"
@@ -590,6 +664,47 @@ chk_join_nr()
 	fi
 	if [ $checksum -eq 1 ]; then
 		chk_csum_nr
+		chk_fail_nr 0 0
+	fi
+}
+
+# a negative value for 'stale_max' means no upper bound:
+# for bidirectional transfer, if one peer sleep for a while
+# - as these tests do - we can have a quite high number of
+# stale/recover conversions, proportional to
+# sleep duration/ MPTCP-level RTX interval.
+chk_stale_nr()
+{
+	local ns=$1
+	local stale_min=$2
+	local stale_max=$3
+	local stale_delta=$4
+	local dump_stats
+	local stale_nr
+	local recover_nr
+
+	printf "%-39s %-18s" " " "stale"
+	stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
+	[ -z "$stale_nr" ] && stale_nr=0
+	recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
+	[ -z "$recover_nr" ] && recover_nr=0
+
+	if [ $stale_nr -lt $stale_min ] ||
+	   [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
+	   [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
+		echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
+		     " expected stale in range [$stale_min..$stale_max]," \
+		     " stale-recover delta $stale_delta "
+		ret=1
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	if [ "${dump_stats}" = 1 ]; then
+		echo $ns stats
+		ip netns exec $ns ip -s link show
+		ip netns exec $ns nstat -as | grep MPTcp
 	fi
 }
 
@@ -801,6 +916,27 @@ chk_prio_nr()
 	fi
 }
 
+chk_link_usage()
+{
+	local ns=$1
+	local link=$2
+	local out=$3
+	local expected_rate=$4
+	local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes`
+	local tx_total=`ls -l $out | awk '{print $5}'`
+	local tx_rate=$((tx_link * 100 / $tx_total))
+	local tolerance=5
+
+	printf "%-39s %-18s" " " "link usage"
+	if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
+	     $tx_rate -gt $((expected_rate + $tolerance)) ]; then
+		echo "[fail] got $tx_rate% usage, expected $expected_rate%"
+		ret=1
+	else
+		echo "[ ok ]"
+	fi
+}
+
 subflows_tests()
 {
 	reset
@@ -918,20 +1054,101 @@ signal_address_tests()
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "signal invalid addresses" 1 1 1
 	chk_add_nr 3 3
+
+	# signal addresses race test
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+	ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_add_nr 4 4
 }
 
 link_failure_tests()
 {
 	# accept and use add_addr with additional subflows and link loss
 	reset
+
+	# without any b/w limit each veth could spool the packets and get
+	# them acked at xmit time, so that the corresponding subflow will
+	# have almost always no outstanding pkts, the scheduler will pick
+	# always the first subflow and we will have hard time testing
+	# active backup and link switch-over.
+	# Let's set some arbitrary (low) virtual link limits.
+	init_shapers
 	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
 	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1 1
 	chk_join_nr "multiple flows, signal, link failure" 3 3 3
 	chk_add_nr 1 1
+	chk_stale_nr $ns2 1 5 1
+
+	# accept and use add_addr with additional subflows and link loss
+	# for bidirectional transfer
+	reset
+	init_shapers
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+	run_tests $ns1 $ns2 10.0.1.1 2
+	chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
+	chk_add_nr 1 1
+	chk_stale_nr $ns2 1 -1 1
+
+	# 2 subflows plus 1 backup subflow with a lossy link, backup
+	# will never be used
+	reset
+	init_shapers
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+	export FAILING_LINKS="1"
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	run_tests $ns1 $ns2 10.0.1.1 1
+	chk_join_nr "backup subflow unused, link failure" 2 2 2
+	chk_add_nr 1 1
+	chk_link_usage $ns2 ns2eth3 $cinsent 0
+
+	# 2 lossy links after half transfer, backup will get half of
+	# the traffic
+	reset
+	init_shapers
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	export FAILING_LINKS="1 2"
+	run_tests $ns1 $ns2 10.0.1.1 1
+	chk_join_nr "backup flow used, multi links fail" 2 2 2
+	chk_add_nr 1 1
+	chk_stale_nr $ns2 2 4 2
+	chk_link_usage $ns2 ns2eth3 $cinsent 50
+
+	# use a backup subflow with the first subflow on a lossy link
+	# for bidirectional transfer
+	reset
+	init_shapers
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+	run_tests $ns1 $ns2 10.0.1.1 2
+	chk_join_nr "backup flow used, bidi, link failure" 2 2 2
+	chk_add_nr 1 1
+	chk_stale_nr $ns2 1 -1 2
+	chk_link_usage $ns2 ns2eth3 $cinsent 50
 }
 
 add_addr_timeout_tests()
@@ -1530,6 +1747,55 @@ deny_join_id0_tests()
 	chk_join_nr "subflow and address allow join id0 2" 1 1 1
 }
 
+fullmesh_tests()
+{
+	# fullmesh 1
+	# 2 fullmesh addrs in ns2, added before the connection,
+	# 1 non-fullmesh addr in ns1, added during the connection.
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 4
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+	run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+	chk_join_nr "fullmesh test 2x1" 4 4 4
+	chk_add_nr 1 1
+
+	# fullmesh 2
+	# 1 non-fullmesh addr in ns1, added before the connection,
+	# 1 fullmesh addr in ns2, added during the connection.
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 1 3
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
+	chk_join_nr "fullmesh test 1x1" 3 3 3
+	chk_add_nr 1 1
+
+	# fullmesh 3
+	# 1 non-fullmesh addr in ns1, added before the connection,
+	# 2 fullmesh addrs in ns2, added during the connection.
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 2 5
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 5
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+	chk_join_nr "fullmesh test 1x2" 5 5 5
+	chk_add_nr 1 1
+
+	# fullmesh 4
+	# 1 non-fullmesh addr in ns1, added before the connection,
+	# 2 fullmesh addrs in ns2, added during the connection,
+	# limit max_subflows to 4.
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 2 4
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+	chk_join_nr "fullmesh test 1x2, limited" 4 4 4
+	chk_add_nr 1 1
+}
+
 all_tests()
 {
 	subflows_tests
@@ -1545,6 +1811,7 @@ all_tests()
 	syncookies_tests
 	checksum_tests
 	deny_join_id0_tests
+	fullmesh_tests
 }
 
 usage()
@@ -1563,6 +1830,7 @@ usage()
 	echo "  -k syncookies_tests"
 	echo "  -S checksum_tests"
 	echo "  -d deny_join_id0_tests"
+	echo "  -m fullmesh_tests"
 	echo "  -c capture pcap files"
 	echo "  -C enable data checksum"
 	echo "  -h help"
@@ -1598,7 +1866,7 @@ if [ $do_all_tests -eq 1 ]; then
 	exit $ret
 fi
 
-while getopts 'fsltra64bpkdchCS' opt; do
+while getopts 'fsltra64bpkdmchCS' opt; do
 	case $opt in
 		f)
 			subflows_tests
@@ -1639,6 +1907,9 @@ while getopts 'fsltra64bpkdchCS' opt; do
 		d)
 			deny_join_id0_tests
 			;;
+		m)
+			fullmesh_tests
+			;;
 		c)
 			;;
 		C)
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 115decfdc1ef..354784512748 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -25,7 +25,7 @@
 static void syntax(char *argv[])
 {
 	fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
-	fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
 	fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
@@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
 					flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
 				else if (!strcmp(tok, "backup"))
 					flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+				else if (!strcmp(tok, "fullmesh"))
+					flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
 				else
 					error(1, errno,
 					      "unknown flag %s", argv[arg]);
 			}
 
+			if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+			    flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+				error(1, errno, "error flag fullmesh");
+			}
+
 			rta = (void *)(data + off);
 			rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
 			rta->rta_len = RTA_LENGTH(4);
@@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len)
 					printf(",");
 			}
 
+			if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+				printf("fullmesh");
+				flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
+				if (flags)
+					printf(",");
+			}
+
 			/* bump unknown flags, if any */
 			if (flags)
 				printf("0x%x", flags);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index fd63ebfe9a2b..910d8126af8f 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -22,8 +22,8 @@ usage() {
 
 cleanup()
 {
-	rm -f "$cin" "$cout"
-	rm -f "$sin" "$sout"
+	rm -f "$cout" "$sout"
+	rm -f "$large" "$small"
 	rm -f "$capout"
 
 	local netns
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index db4521335722..3653d6468c67 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -111,8 +111,8 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 static void sock_fanout_set_cbpf(int fd)
 {
 	struct sock_filter bpf_filter[] = {
-		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80),	      /* ldb [80] */
-		BPF_STMT(BPF_RET+BPF_A, 0),		      /* ret A */
+		BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80),	      /* ldb [80] */
+		BPF_STMT(BPF_RET | BPF_A, 0),		      /* ret A */
 	};
 	struct sock_fprog bpf_prog;
 
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 170be65e0816..1cbfeb5052ec 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
 echo "raw gso min size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
 
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
 echo "raw gso max size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
 
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 8b42e8b04e0f..a59cb6a3c4f5 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -1,9 +1,12 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 if [ $(id -u) != 0 ]; then
 	echo $msg must be run as root >&2
-	exit 0
+	exit $ksft_skip
 fi
 
 ret=0
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100755
index 000000000000..e57bbfbc5208
--- /dev/null
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+
+netdev_check_for_carrier() {
+	local -r dev="$1"
+
+	for i in {1..5}; do
+		carrier="$(cat /sys/class/net/${dev}/carrier)"
+		if [[ "${carrier}" -ne 1 ]] ; then
+			echo "carrier not ready yet..." >&2
+			sleep 1
+		else
+			echo "carrier ready" >&2
+			break
+		fi
+	done
+	echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+	local dev="$1"
+
+	# Fail hard if cannot turn on loopback mode for current NIC
+	ethtool -K "${dev}" loopback on || exit 1
+	sleep 1
+
+	# Check for the carrier
+	carrier=$(netdev_check_for_carrier ${dev})
+	if [[ "${carrier}" -ne 1 ]] ; then
+		echo "setup_loopback_environment failed"
+		exit 1
+	fi
+}
+
+setup_macvlan_ns(){
+	local -r link_dev="$1"
+	local -r ns_name="$2"
+	local -r ns_dev="$3"
+	local -r ns_mac="$4"
+	local -r addr="$5"
+
+	ip link add link "${link_dev}" dev "${ns_dev}" \
+		address "${ns_mac}" type macvlan
+	exit_code=$?
+	if [[ "${exit_code}" -ne 0 ]]; then
+		echo "setup_macvlan_ns failed"
+		exit $exit_code
+	fi
+
+	[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+	ip link set dev "${ns_dev}" netns "${ns_name}"
+	ip -netns "${ns_name}" link set dev "${ns_dev}" up
+	if [[ -n "${addr}" ]]; then
+		ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+	fi
+
+	sleep 1
+}
+
+cleanup_macvlan_ns(){
+	while (( $# >= 2 )); do
+		ns_name="$1"
+		ns_dev="$2"
+		ip -netns "${ns_name}" link del dev "${ns_dev}"
+		ip netns del "${ns_name}"
+		shift 2
+	done
+}
+
+cleanup_loopback(){
+	local -r dev="$1"
+
+	ethtool -K "${dev}" loopback off
+	sleep 1
+
+	# Check for the carrier
+	carrier=$(netdev_check_for_carrier ${dev})
+	if [[ "${carrier}" -ne 1 ]] ; then
+		echo "setup_loopback_environment failed"
+		exit 1
+	fi
+}
+
+setup_interrupt() {
+	# Use timer on  host to trigger the network stack
+	# Also disable device interrupt to not depend on NIC interrupt
+	# Reduce test flakiness caused by unexpected interrupts
+	echo 100000 >"${FLUSH_PATH}"
+	echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+	# Set up server_ns namespace and client_ns namespace
+	setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+	setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+	cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+	setup_loopback_environment "${dev}"
+	setup_interrupt
+}
+
+cleanup() {
+	cleanup_loopback "${dev}"
+
+	echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+	echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
new file mode 100644
index 000000000000..1003ddf7b3b2
--- /dev/null
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+setup_veth_ns() {
+	local -r link_dev="$1"
+	local -r ns_name="$2"
+	local -r ns_dev="$3"
+	local -r ns_mac="$4"
+
+	[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+	echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+	ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
+	ip -netns "${ns_name}" link set dev "${ns_dev}" up
+
+	ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
+}
+
+setup_ns() {
+	# Set up server_ns namespace and client_ns namespace
+	ip link add name server type veth peer name client
+
+	setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}"
+	setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+	local ns_name
+
+	for ns_name in client_ns server_ns; do
+		[[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
+	done
+}
+
+setup() {
+	# no global init setup step needed
+	:
+}
+
+cleanup() {
+	cleanup_ns
+}
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 75ada17ac061..aebaab8ce44c 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -193,6 +193,9 @@
 # +---------------------------------------------------+
 #
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
 readonly IPv6_HS_NETWORK=cafe
@@ -543,18 +546,18 @@ host_vpn_isolation_tests()
 
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
         echo "SKIP: vrf sysctl does not exist"
-        exit 0
+        exit $ksft_skip
 fi
 
 cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index ad7a9fc59934..1003119773e5 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -163,6 +163,9 @@
 # +---------------------------------------------------+
 #
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
 readonly IPv4_HS_NETWORK=10.0.0
@@ -464,18 +467,18 @@ host_vpn_isolation_tests()
 
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
         echo "SKIP: vrf sysctl does not exist"
-        exit 0
+        exit $ksft_skip
 fi
 
 cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
index 68708f5e26a0..b9b06ef80d88 100755
--- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -164,6 +164,9 @@
 # +---------------------------------------------------+
 #
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
 readonly IPv6_HS_NETWORK=cafe
@@ -472,18 +475,18 @@ host_vpn_isolation_tests()
 
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
         echo "SKIP: vrf sysctl does not exist"
-        exit 0
+        exit $ksft_skip
 fi
 
 cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644
index 000000000000..710ac956bdb3
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN	40
+#define TOEPLITZ_KEY_MAX_LEN	60
+
+#define TOEPLITZ_STR_LEN(K)	(((K) * 3) - 1)	/* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN	TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN	TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN	((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16)	/* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL	/* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport =	8000;
+static int cfg_family =		AF_INET6;
+static char *cfg_ifname =	"eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type =		SOCK_STREAM;
+static int cfg_timeout_msec =	1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...)	do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+	int fd;
+	char *mmap;
+	int idx;
+	int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS];	/* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+				const unsigned char *key)
+{
+	int i, bit, ret = 0;
+	uint32_t key32;
+
+	key32 = ntohl(*((uint32_t *)key));
+	key += 4;
+
+	for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+		for (bit = 7; bit >= 0; bit--) {
+			if (four_tuple[i] & (1 << bit))
+				ret ^= key32;
+
+			key32 <<= 1;
+			key32 |= !!(key[0] & (1 << bit));
+		}
+		key++;
+	}
+
+	return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+	int queue = rx_hash % cfg_num_queues;
+
+	log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+	if (rx_irq_cpus[queue] != cpu) {
+		log_verbose(". error: rss cpu mismatch (%d)", cpu);
+		frames_error++;
+	}
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+	int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+	log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+	if (rps_silo_to_cpu[silo] != cpu) {
+		log_verbose(". error: rps cpu mismatch (%d)", cpu);
+		frames_error++;
+	}
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+		       const char *addrs, int addr_len)
+{
+	char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+	uint16_t *ports;
+
+	if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+	    !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+		error(1, 0, "address parse error");
+
+	ports = (void *)addrs + (addr_len * 2);
+	log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+		    cpu, rx_hash, saddr, daddr,
+		    ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+	unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+	uint32_t rx_hash_sw;
+	const char *addrs;
+	int addr_len;
+
+	if (cfg_family == AF_INET) {
+		addr_len = sizeof(struct in_addr);
+		addrs = pkt + offsetof(struct iphdr, saddr);
+	} else {
+		addr_len = sizeof(struct in6_addr);
+		addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+	}
+
+	memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+	rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+	if (cfg_verbose)
+		log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+	if (rx_hash != rx_hash_sw) {
+		log_verbose(" != expected 0x%x\n", rx_hash_sw);
+		frames_error++;
+		return;
+	}
+
+	log_verbose(" OK");
+	if (cfg_num_queues)
+		verify_rss(rx_hash, cpu);
+	else if (cfg_num_rps_cpus)
+		verify_rps(rx_hash, cpu);
+	log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+	struct tpacket3_hdr *hdr = (void *)frame;
+
+	if (hdr->hv1.tp_rxhash)
+		verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+			      ring->cpu);
+	else
+		frames_nohash++;
+
+	return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+	struct tpacket_block_desc *block;
+	char *frame;
+	int i;
+
+	block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+	if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+		return;
+
+	frame = (char *)block;
+	frame += block->hdr.bh1.offset_to_first_pkt;
+
+	for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+		frame = recv_frame(ring, frame);
+		frames_received++;
+	}
+
+	block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+	ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+	int i;
+
+	usleep(1000 * cfg_timeout_msec);
+
+	for (i = 0; i < num_cpus; i++)
+		recv_block(&rings[i]);
+
+	fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+		frames_received - frames_nohash - frames_error,
+		frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+	struct tpacket_req3 req3 = {0};
+	void *ring;
+
+	req3.tp_retire_blk_tov = cfg_timeout_msec;
+	req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+	req3.tp_frame_size = 2048;
+	req3.tp_frame_nr = 1 << 10;
+	req3.tp_block_nr = 2;
+
+	req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+	req3.tp_block_size /= req3.tp_block_nr;
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+		error(1, errno, "setsockopt PACKET_RX_RING");
+
+	ring_block_sz = req3.tp_block_size;
+	ring_block_nr = req3.tp_block_nr;
+
+	ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+		    PROT_READ | PROT_WRITE,
+		    MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+	if (ring == MAP_FAILED)
+		error(1, 0, "mmap failed");
+
+	return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, off_proto),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+		BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, off_dport),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+	};
+	struct sock_fprog prog = {};
+
+	prog.filter = filter;
+	prog.len = sizeof(filter) / sizeof(struct sock_filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+		error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+	const int off_dport = offsetof(struct tcphdr, dest);	/* same for udp */
+	uint8_t proto;
+
+	proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+	if (cfg_family == AF_INET)
+		__set_filter(fd, offsetof(struct iphdr, protocol), proto,
+			     sizeof(struct iphdr) + off_dport);
+	else
+		__set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+			     sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET + BPF_K, 0),
+	};
+	struct sock_fprog prog = {};
+
+	prog.filter = filter;
+	prog.len = sizeof(filter) / sizeof(struct sock_filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+		error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+	struct fanout_args args = {
+		.id = 1,
+		.type_flags = PACKET_FANOUT_CPU,
+		.max_num_members = RSS_MAX_CPUS
+	};
+	struct sockaddr_ll ll = { 0 };
+	int fd, val;
+
+	fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket creation failed");
+
+	val = TPACKET_V3;
+	if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+		error(1, errno, "setsockopt PACKET_VERSION");
+	*ring = setup_ring(fd);
+
+	/* block packets until all rings are added to the fanout group:
+	 * else packets can arrive during setup and get misclassified
+	 */
+	set_filter_null(fd);
+
+	ll.sll_family = AF_PACKET;
+	ll.sll_ifindex = if_nametoindex(cfg_ifname);
+	ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+						  htons(ETH_P_IPV6);
+	if (bind(fd, (void *)&ll, sizeof(ll)))
+		error(1, errno, "bind");
+
+	/* must come after bind: verifies all programs in group match */
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+		/* on failure, retry using old API if that is sufficient:
+		 * it has a hard limit of 256 sockets, so only try if
+		 * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+		 * in this API, the third argument is left implicit.
+		 */
+		if (cfg_num_queues || num_cpus > 256 ||
+		    setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+			       &args, sizeof(uint32_t)))
+			error(1, errno, "setsockopt PACKET_FANOUT cpu");
+	}
+
+	return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+	int fd, val;
+
+	fd = socket(cfg_family, cfg_type, 0);
+	if (fd == -1)
+		error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+	val = 1 << 20;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+		error(1, errno, "setsockopt rcvbuf");
+
+	return fd;
+}
+
+static void setup_rings(void)
+{
+	int i;
+
+	for (i = 0; i < num_cpus; i++) {
+		rings[i].cpu = i;
+		rings[i].fd = create_ring(&rings[i].mmap);
+	}
+
+	/* accept packets once all rings in the fanout group are up */
+	for (i = 0; i < num_cpus; i++)
+		set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+	int i;
+
+	for (i = 0; i < num_cpus; i++) {
+		if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+			error(1, errno, "munmap");
+		if (close(rings[i].fd))
+			error(1, errno, "close");
+	}
+}
+
+static void parse_cpulist(const char *arg)
+{
+	do {
+		rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+		arg = strchr(arg, ',');
+		if (!arg)
+			break;
+		arg++;			// skip ','
+	} while (1);
+}
+
+static void show_cpulist(void)
+{
+	int i;
+
+	for (i = 0; i < cfg_num_queues; i++)
+		fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+	int i;
+
+	for (i = 0; i < cfg_num_rps_cpus; i++)
+		fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+	int i, ret, off;
+
+	if (slen < TOEPLITZ_STR_MIN_LEN ||
+	    slen > TOEPLITZ_STR_MAX_LEN + 1)
+		error(1, 0, "invalid toeplitz key");
+
+	for (i = 0, off = 0; off < slen; i++, off += 3) {
+		ret = sscanf(str + off, "%hhx", &key[i]);
+		if (ret != 1)
+			error(1, 0, "key parse error at %d off %d len %d",
+			      i, off, slen);
+	}
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+	unsigned long bitmap;
+	int i;
+
+	bitmap = strtoul(arg, NULL, 0);
+
+	if (bitmap & ~(RPS_MAX_CPUS - 1))
+		error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+		      bitmap, RPS_MAX_CPUS - 1);
+
+	for (i = 0; i < RPS_MAX_CPUS; i++)
+		if (bitmap & 1UL << i)
+			rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	static struct option long_options[] = {
+	    {"dport",	required_argument, 0, 'd'},
+	    {"cpus",	required_argument, 0, 'C'},
+	    {"key",	required_argument, 0, 'k'},
+	    {"iface",	required_argument, 0, 'i'},
+	    {"ipv4",	no_argument, 0, '4'},
+	    {"ipv6",	no_argument, 0, '6'},
+	    {"sink",	no_argument, 0, 's'},
+	    {"tcp",	no_argument, 0, 't'},
+	    {"timeout",	required_argument, 0, 'T'},
+	    {"udp",	no_argument, 0, 'u'},
+	    {"verbose",	no_argument, 0, 'v'},
+	    {"rps",	required_argument, 0, 'r'},
+	    {0, 0, 0, 0}
+	};
+	bool have_toeplitz = false;
+	int index, c;
+
+	while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = AF_INET;
+			break;
+		case '6':
+			cfg_family = AF_INET6;
+			break;
+		case 'C':
+			parse_cpulist(optarg);
+			break;
+		case 'd':
+			cfg_dport = strtol(optarg, NULL, 0);
+			break;
+		case 'i':
+			cfg_ifname = optarg;
+			break;
+		case 'k':
+			parse_toeplitz_key(optarg, strlen(optarg),
+					   toeplitz_key);
+			have_toeplitz = true;
+			break;
+		case 'r':
+			parse_rps_bitmap(optarg);
+			break;
+		case 's':
+			cfg_sink = true;
+			break;
+		case 't':
+			cfg_type = SOCK_STREAM;
+			break;
+		case 'T':
+			cfg_timeout_msec = strtol(optarg, NULL, 0);
+			break;
+		case 'u':
+			cfg_type = SOCK_DGRAM;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+
+		default:
+			error(1, 0, "unknown option %c", optopt);
+			break;
+		}
+	}
+
+	if (!have_toeplitz)
+		error(1, 0, "Must supply rss key ('-k')");
+
+	num_cpus = get_nprocs();
+	if (num_cpus > RSS_MAX_CPUS)
+		error(1, 0, "increase RSS_MAX_CPUS");
+
+	if (cfg_num_queues && cfg_num_rps_cpus)
+		error(1, 0,
+		      "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+	if (cfg_verbose) {
+		show_cpulist();
+		show_silos();
+	}
+}
+
+int main(int argc, char **argv)
+{
+	const int min_tests = 10;
+	int fd_sink = -1;
+
+	parse_opts(argc, argv);
+
+	if (cfg_sink)
+		fd_sink = setup_sink();
+
+	setup_rings();
+	process_rings();
+	cleanup_rings();
+
+	if (cfg_sink && close(fd_sink))
+		error(1, errno, "close sink");
+
+	if (frames_received - frames_nohash < min_tests)
+		error(1, 0, "too few frames for verification");
+
+	return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755
index 000000000000..0a49907cd4fe
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+	echo $(ethtool -x "${DEV}" |
+		egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+		cut -d: -f2- |
+		awk '{$1=$1};1' |
+		tr ' ' '\n' |
+		sort -u |
+		wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+	CPUS=""
+	# sort so that irq 2 is read before irq 10
+	SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+	# Consider only as many queues as RSS actually uses. We assume that
+	# if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+	RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+	RXQ_COUNT=0
+
+	for i in ${SORTED_IRQS}
+	do
+		[[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+		# lookup relevant IRQs by action name
+		[[ -e "$i/actions" ]] || continue
+		cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+		irqname=$(<"$i/actions")
+
+		# does the IRQ get called
+		irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+		[[ -n "${irqcount}" ]] || continue
+
+		# lookup CPU
+		irq=$(basename "$i")
+		cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+		if [[ -z "${CPUS}" ]]; then
+			CPUS="${cpu}"
+		else
+			CPUS="${CPUS},${cpu}"
+		fi
+		RXQ_COUNT=$((RXQ_COUNT+1))
+	done
+
+	echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+	echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+	CMD=""
+	for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+	do
+		CMD="${CMD} echo $1 > ${i};"
+	done
+
+	echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+	echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+	echo "$1"
+	exit 1
+}
+
+check_nic_rxhash_enabled() {
+	local -r pattern="receive-hashing:\ on"
+
+	ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+	local prog=$0
+	shift 1
+
+	while [[ "$1" =~ "-" ]]; do
+		if [[ "$1" = "-irq_prefix" ]]; then
+			shift
+			IRQ_PATTERN="^$1-[0-9]*$"
+		elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+			PROTO_FLAG="$1"
+		elif [[ "$1" = "-4" ]]; then
+			IP_FLAG="$1"
+			SERVER_IP="${SERVER_IP4}"
+			CLIENT_IP="${CLIENT_IP4}"
+		elif [[ "$1" = "-6" ]]; then
+			IP_FLAG="$1"
+			SERVER_IP="${SERVER_IP6}"
+			CLIENT_IP="${CLIENT_IP6}"
+		elif [[ "$1" = "-rss" ]]; then
+			TEST_RSS=true
+		elif [[ "$1" = "-rps" ]]; then
+			shift
+			RPS_MAP="$1"
+		elif [[ "$1" = "-i" ]]; then
+			shift
+			DEV="$1"
+		else
+			die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+			     [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+		fi
+		shift
+	done
+}
+
+setup() {
+	setup_loopback_environment "${DEV}"
+
+	# Set up server_ns namespace and client_ns namespace
+	setup_macvlan_ns "${DEV}" server_ns server \
+	"${SERVER_MAC}" "${SERVER_IP}"
+	setup_macvlan_ns "${DEV}" client_ns client \
+	"${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+	cleanup_macvlan_ns server_ns server client_ns client
+	cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+	# RPS/RFS must be disabled because they move packets between cpus,
+	# which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+	eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+	  ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+	  -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+	eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+	  ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+	  -r "0x${RPS_MAP}" -s -v &
+else
+	ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+  "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+	echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755
index 000000000000..2fef34f4aba1
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz_client.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+	expiration=$((SECONDS+20))
+	while [[ "${SECONDS}" -lt "${expiration}" ]]
+	do
+		if [[ "${PROTO}" == "-u" ]]; then
+			echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+		else
+			echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+		fi
+		sleep 0.001
+	done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index 66354cdd5ce4..2d10ccac898a 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -28,12 +28,15 @@
 # These tests provide an easy way to flip the expected result of any
 # of these behaviors for testing kernel patches that change them.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # nettest can be run from PATH or from same directory as this selftest
 if ! which nettest >/dev/null; then
 	PATH=$PWD:$PATH
 	if ! which nettest >/dev/null; then
 		echo "'nettest' command not found; skipping tests"
-		exit 0
+		exit $ksft_skip
 	fi
 fi
 
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 11d7cdb898c0..19eac3e44c06 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST
 readonly BM_NET_V4=192.168.1.
 readonly BM_NET_V6=2001:db8::
 
-readonly NPROCS=`nproc`
+readonly CPUS=`nproc`
 ret=0
 
 cleanup() {
@@ -75,6 +75,29 @@ chk_tso_flag() {
 	__chk_flag "$1" $2 $3 tcp-segmentation-offload
 }
 
+chk_channels() {
+	local msg="$1"
+	local target=$2
+	local rx=$3
+	local tx=$4
+
+	local dev=veth$target
+
+	local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+		grep RX: | tail -n 1 | awk '{print $2}' `
+		local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+		grep TX: | tail -n 1 | awk '{print $2}'`
+	local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+		grep Combined: | tail -n 1 | awk '{print $2}'`
+
+	printf "%-60s" "$msg"
+	if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+		echo " ok "
+	else
+		echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+	fi
+}
+
 chk_gro() {
 	local msg="$1"
 	local expected=$2
@@ -107,11 +130,100 @@ chk_gro() {
 	fi
 }
 
+__change_channels()
+{
+	local cur_cpu
+	local end=$1
+	local cur
+	local i
+
+	while true; do
+		printf -v cur '%(%s)T'
+		[ $cur -le $end ] || break
+
+		for i in `seq 1 $CPUS`; do
+			ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+			ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+		done
+
+		for i in `seq 1 $((CPUS - 1))`; do
+			cur_cpu=$((CPUS - $i))
+			ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+			ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+		done
+	done
+}
+
+__send_data() {
+	local end=$1
+
+	while true; do
+		printf -v cur '%(%s)T'
+		[ $cur -le $end ] || break
+
+		ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+	done
+}
+
+do_stress() {
+	local end
+	printf -v end '%(%s)T'
+	end=$((end + $STRESS))
+
+	ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+	ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+	ip netns exec $NS_DST ./udpgso_bench_rx &
+	local rx_pid=$!
+
+	echo "Running stress test for $STRESS seconds..."
+	__change_channels $end &
+	local ch_pid=$!
+	__send_data $end &
+	local data_pid_1=$!
+	__send_data $end &
+	local data_pid_2=$!
+	__send_data $end &
+	local data_pid_3=$!
+	__send_data $end &
+	local data_pid_4=$!
+
+	wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+	kill -9 $rx_pid
+	echo "done"
+
+	# restore previous setting
+	ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+	ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+	echo "Usage: $0 [-h] [-s <seconds>]"
+	echo -e "\t-h: show this help"
+	echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+	case "$option" in
+	"h")
+		usage $0
+		exit 0
+		;;
+	"s")
+		STRESS=$OPTARG
+		;;
+	esac
+done
+
 if [ ! -f ../bpf/xdp_dummy.o ]; then
 	echo "Missing xdp_dummy helper. Build bpf selftest first"
 	exit 1
 fi
 
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
 create_ns
 chk_gro_flag "default - gro flag" $SRC off
 chk_gro_flag "        - peer gro flag" $DST off
@@ -134,6 +246,8 @@ chk_gro "        - aggregation with TSO off" 1
 cleanup
 
 create_ns
+chk_channels "default channels" $DST 1 1
+
 ip -n $NS_DST link set dev veth$DST down
 ip netns exec $NS_DST ethtool -K veth$DST gro on
 chk_gro_flag "with gro enabled on link down - gro flag" $DST on
@@ -147,6 +261,56 @@ chk_gro "        - aggregation with TSO off" 1
 cleanup
 
 create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+	ip netns exec $NS_DST ethtool -L veth$DST tx 2
+	chk_channels "setting tx channels" $DST 1 2
+	CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+	ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+	chk_channels "setting both rx and tx channels" $DST 3 3
+	CUR_RX=3
+	CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+	# this also tests queues nr reduction
+	ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+	ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+	printf "%-60s" "bad setting: XDP with RX nr less than TX"
+	ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+		section xdp_dummy 2>/dev/null &&\
+		echo "fail - set operation successful ?!?" || echo " ok "
+
+	# the following tests will run with multiple channels active
+	ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+	ip netns exec $NS_DST ethtool -L veth$DST rx 2
+	ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+		section xdp_dummy 2>/dev/null
+	printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+	ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+		echo "fail - set operation successful ?!?" || echo " ok "
+	CUR_RX=2
+	CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+	printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+	ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+		echo "fail - set operation successful ?!?" || echo " ok "
+	chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
 ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
 chk_gro_flag "with xdp attached - gro flag" $DST on
 chk_gro_flag "        - peer gro flag" $SRC off
@@ -167,10 +331,27 @@ chk_gro_flag "        - after gro on xdp off, gro flag" $DST on
 chk_gro_flag "        - peer gro flag" $SRC off
 chk_tso_flag "        - tso flag" $SRC on
 chk_tso_flag "        - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+	ip netns exec $NS_DST ethtool -L veth$DST tx 1
+	chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
 ip -n $NS_DST link set dev veth$DST up
 ip -n $NS_SRC link set dev veth$SRC up
 chk_gro "        - aggregation" 1
 
+if [ $CPUS -gt 1 ]; then
+	[ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+	ip -n $NS_DST link set dev veth$DST down
+	ip -n $NS_SRC link set dev veth$SRC down
+	ip netns exec $NS_DST ethtool -L veth$DST tx 2
+	chk_channels "increasing tx channels with device down" $DST 2 2
+	ip -n $NS_DST link set dev veth$DST up
+	ip -n $NS_SRC link set dev veth$SRC up
+fi
+
 ip netns exec $NS_DST ethtool -K veth$DST gro off
 ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
 chk_gro "aggregation again with default and TSO off" 10
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 18b982d611de..865d53c1781c 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -3,6 +3,9 @@
 
 # This test is designed for testing the new VRF strict_mode functionality.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 ret=0
 
 # identifies the "init" network namespace which is often called root network
@@ -371,18 +374,18 @@ vrf_strict_mode_check_support()
 
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
 	echo "SKIP: vrf sysctl does not exist"
-	exit 0
+	exit $ksft_skip
 fi
 
 cleanup &> /dev/null