aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/Makefile6
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c437
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh33
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh7
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh15
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh27
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh2
-rwxr-xr-xtools/testing/selftests/net/gre_gso.sh236
-rw-r--r--tools/testing/selftests/net/gro.c1095
-rwxr-xr-xtools/testing/selftests/net/gro.sh99
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh652
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c720
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh345
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c16
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh4
-rw-r--r--tools/testing/selftests/net/psock_fanout.c4
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh3
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests5
-rwxr-xr-xtools/testing/selftests/net/setup_loopback.sh118
-rw-r--r--tools/testing/selftests/net/setup_veth.sh41
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh9
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh9
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh9
-rw-r--r--tools/testing/selftests/net/toeplitz.c585
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh5
-rwxr-xr-xtools/testing/selftests/net/veth.sh183
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh9
32 files changed, 4826 insertions, 83 deletions
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 79c9eb0034d5..492b273743b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -25,6 +25,9 @@ TEST_PROGS += bareudp.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
TEST_PROGS += veth.sh
+TEST_PROGS += ioam6.sh
+TEST_PROGS += gro.sh
+TEST_PROGS += gre_gso.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -36,8 +39,11 @@ TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_FILES += ipsec
+TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644
index 000000000000..cfc7f4f97fd1
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -0,0 +1,5 @@
+##TEST_GEN_FILES := test_unix_oob
+TEST_PROGS := test_unix_oob
+include ../../lib.mk
+
+all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644
index 000000000000..0f3e3763f4f8
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+ signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = sig_hand;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+ return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (set)
+ flags &= ~O_NONBLOCK;
+ else
+ flags |= O_NONBLOCK;
+ fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+ char cmd;
+
+ cmd = 'S';
+ write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+ char buf[5];
+
+ read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+ fflush(NULL);
+ unlink(sock_name);
+ kill(producer_id, SIGTERM);
+ exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+ int ans = -1;
+
+ if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+ perror("SIOCATMARK Failed");
+#endif
+ }
+ return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+ *c = ' ';
+ if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+ perror("Reading MSG_OOB Failed");
+#endif
+ }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+ int len = 0;
+
+ memset(buf, size, '0');
+ len = read(pfd, buf, size);
+#ifdef DEBUG
+ if (len < 0)
+ perror("read failed");
+#endif
+ return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+ struct pollfd pfds[1];
+
+ pfds[0].fd = pfd;
+ pfds[0].events = event;
+ poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+ int cfd;
+ char buf[64];
+ int i;
+
+ memset(buf, 'x', sizeof(buf));
+ cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ wait_for_signal(pipefd[0]);
+ if (connect(cfd, (struct sockaddr *)consumer_addr,
+ sizeof(struct sockaddr)) != 0) {
+ perror("Connect failed");
+ kill(0, SIGTERM);
+ exit(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ /* Test 1: Test for SIGURG and OOB */
+ wait_for_signal(pipefd[0]);
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 2: Test for OOB being overwitten */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '#';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 3: Test for SIOCATMARK */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ send(cfd, buf, sizeof(buf), 0);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 4: Test for 1byte OOB msg */
+ memset(buf, 'x', sizeof(buf));
+ buf[0] = '@';
+ send(cfd, buf, 1, MSG_OOB);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int lfd, pfd;
+ struct sockaddr_un consumer_addr, paddr;
+ socklen_t len = sizeof(consumer_addr);
+ char buf[1024];
+ int on = 0;
+ char oob;
+ int flags;
+ int atmark;
+ char *tmp_file;
+
+ lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ memset(&consumer_addr, 0, sizeof(consumer_addr));
+ consumer_addr.sun_family = AF_UNIX;
+ sprintf(sock_name, "unix_oob_%d", getpid());
+ unlink(sock_name);
+ strcpy(consumer_addr.sun_path, sock_name);
+
+ if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+ sizeof(consumer_addr))) != 0) {
+ perror("socket bind failed");
+ exit(1);
+ }
+
+ pipe(pipefd);
+
+ listen(lfd, 1);
+
+ producer_id = fork();
+ if (producer_id == 0) {
+ producer(&consumer_addr);
+ exit(0);
+ }
+
+ set_sig_handler(SIGURG);
+ signal_producer(pipefd[1]);
+
+ pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+ fcntl(pfd, F_SETOWN, getpid());
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1:
+ * veriyf that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ read_oob(pfd, &oob);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 63 || oob != '@') {
+ fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2:
+ * Verify that the first OOB is over written by
+ * the 2nd one and the first OOB is returned as
+ * part of the read, and sigurg is received.
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = 0;
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ read_oob(pfd, &oob);
+ if (!signal_recvd || len != 127 || oob != '#') {
+ fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and atmark
+ * is set.
+ * oob is '%' and second read returns
+ * 64 bytes.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 150)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+ fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
+ "atmark %d\n", signal_recvd, len, oob, atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+
+ len = read_data(pfd, buf, 1024);
+ if (len != 64) {
+ fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4:
+ * verify that a single byte
+ * oob message is delivered.
+ * set non blocking mode and
+ * check proper error is
+ * returned and sigurg is
+ * received and correct
+ * oob is read.
+ */
+
+ set_filemode(pfd, 0);
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if ((len == -1) && (errno == 11))
+ len = 0;
+
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 0 || oob != '@') {
+ fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ set_filemode(pfd, 1);
+
+ /* Inline Testing */
+
+ on = 1;
+ if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+ perror("SO_OOBINLINE");
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1 -- Inline:
+ * Check that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+
+ if (!signal_recvd || len != 63) {
+ fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+ signal_recvd, len);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+
+ if (len != 1) {
+ fprintf(stderr,
+ "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2 -- Inline:
+ * Verify that the first OOB is over written by
+ * the 2nd one and read breaks correctly on
+ * 2nd OOB boundary with the first OOB returned as
+ * part of the read, and sigurg is delivered and
+ * siocatmark returns true.
+ * next read returns one byte, the oob byte
+ * and siocatmark returns false.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 127 || atmark != 1 || !signal_recvd) {
+ fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+ len, atmark);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 1 || buf[0] != '#' || atmark == 1) {
+ fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3 -- Inline:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and siocatmark
+ * is true after the read.
+ * subsequent read returns 65 bytes
+ * because of oob which should be '%'.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 126)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (!signal_recvd || len != 127 || !atmark) {
+ fprintf(stderr,
+ "Test 3 Inline failed, sigurg %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 65 || buf[0] != '%' || atmark != 0) {
+ fprintf(stderr,
+ "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4 -- Inline:
+ * verify that a single
+ * byte oob message is delivered
+ * and read returns one byte, the oob
+ * byte and sigurg is received
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 1 || buf[0] != '@') {
+ fprintf(stderr,
+ "Test 4 Inline failed, signal %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+ die(0);
+}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 6f905b53904f..21b646d10b88 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -42,3 +42,4 @@ CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_BAREUDP=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index a8ad92850e63..13350cd5c8ac 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -37,6 +37,9 @@
#
# server / client nomenclature relative to ns-A
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
VERBOSE=0
NSA_DEV=eth1
@@ -3879,6 +3882,32 @@ use_case_ping_lla_multi()
log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
}
+# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully
+# established with ns-B.
+use_case_snat_on_vrf()
+{
+ setup "yes"
+
+ local port="12345"
+
+ run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+
+ run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
+ sleep 1
+ run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
+ log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
+
+ run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
+ sleep 1
+ run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
+ log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
+
+ # Cleanup
+ run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+}
+
use_cases()
{
log_section "Use cases"
@@ -3886,6 +3915,8 @@ use_cases()
use_case_br
log_subsection "Ping LLA with multiple interfaces"
use_case_ping_lla_multi
+ log_subsection "SNAT on VRF"
+ use_case_snat_on_vrf
}
################################################################################
@@ -3946,7 +3977,7 @@ fi
which nettest >/dev/null
if [ $? -ne 0 ]; then
echo "'nettest' command not found; skipping tests"
- exit 0
+ exit $ksft_skip
fi
declare -i nfail=0
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index a93e6b690e06..43ea8407a82e 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -3,6 +3,9 @@
# This test is for checking IPv4 and IPv6 FIB rules API
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ret=0
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
@@ -238,12 +241,12 @@ run_fibrule_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
# start clean
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 13d3d4428a32..2c14a86adaaa 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -1,6 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
##############################################################################
# Defines
@@ -9,11 +12,11 @@ if [[ ! -v DEVLINK_DEV ]]; then
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
- exit 1
+ exit $ksft_skip
fi
if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
echo "SKIP: devlink device's bus is not PCI"
- exit 1
+ exit $ksft_skip
fi
DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
@@ -22,7 +25,7 @@ elif [[ ! -z "$DEVLINK_DEV" ]]; then
devlink dev show $DEVLINK_DEV &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
- exit 1
+ exit $ksft_skip
fi
fi
@@ -32,19 +35,19 @@ fi
devlink help 2>&1 | grep resource &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink resource support"
- exit 1
+ exit $ksft_skip
fi
devlink help 2>&1 | grep trap &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink trap support"
- exit 1
+ exit $ksft_skip
fi
devlink dev help 2>&1 | grep info &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink dev info support"
- exit 1
+ exit $ksft_skip
fi
##############################################################################
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 42e28c983d41..e7fc5c35b569 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -4,6 +4,9 @@
##############################################################################
# Defines
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# Can be overridden by the configuration file.
PING=${PING:=ping}
PING6=${PING6:=ping6}
@@ -38,7 +41,7 @@ check_tc_version()
tc -j &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing JSON support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -51,7 +54,7 @@ check_tc_mpls_support()
matchall action pipe &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing MPLS support"
- return 1
+ return $ksft_skip
fi
tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
matchall
@@ -69,7 +72,7 @@ check_tc_mpls_lse_stats()
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
- return 1
+ return $ksft_skip
fi
tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
@@ -79,7 +82,7 @@ check_tc_mpls_lse_stats()
if [[ $ret -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
- return 1
+ return $ksft_skip
fi
}
@@ -88,7 +91,7 @@ check_tc_shblock_support()
tc filter help 2>&1 | grep block &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing shared block support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -97,7 +100,7 @@ check_tc_chain_support()
tc help 2>&1|grep chain &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing chain support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -106,7 +109,7 @@ check_tc_action_hw_stats_support()
tc actions help 2>&1 | grep -q hw_stats
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -115,13 +118,13 @@ check_ethtool_lanes_support()
ethtool --help 2>&1| grep lanes &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: ethtool too old; it is missing lanes support"
- exit 1
+ exit $ksft_skip
fi
}
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [[ "$CHECK_TC" = "yes" ]]; then
@@ -134,7 +137,7 @@ require_command()
if [[ ! -x "$(command -v "$cmd")" ]]; then
echo "SKIP: $cmd not installed"
- exit 1
+ exit $ksft_skip
fi
}
@@ -143,7 +146,7 @@ require_command $MZ
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""
- exit 1
+ exit $ksft_skip
fi
##############################################################################
@@ -203,7 +206,7 @@ for ((i = 1; i <= NUM_NETIFS; ++i)); do
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: could not find all required interfaces"
- exit 1
+ exit $ksft_skip
fi
done
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 76efb1f8375e..a0d612e04990 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -411,7 +411,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index 4898dd4118f1..cb08ffe2356a 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -386,7 +386,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
new file mode 100755
index 000000000000..facbb0c80443
--- /dev/null
+++ b/tools/testing/selftests/net/gre_gso.sh
@@ -0,0 +1,236 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking GRE GSO.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="gre_gso"
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+IP="ip -netns ns1"
+NS_EXEC="ip netns exec ns1"
+TMPFILE=`mktemp`
+PID=
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+setup()
+{
+ set -e
+ ip netns add ns1
+ ip netns set ns1 auto
+ $IP link set dev lo up
+
+ ip link add veth0 type veth peer name veth1
+ ip link set veth0 up
+ ip link set veth1 netns ns1
+ $IP link set veth1 name veth0
+ $IP link set veth0 up
+
+ dd if=/dev/urandom of=$TMPFILE bs=1024 count=2048 &>/dev/null
+ set +e
+}
+
+cleanup()
+{
+ rm -rf $TMPFILE
+ [ -n "$PID" ] && kill $PID
+ ip link del dev gre1 &> /dev/null
+ ip link del dev veth0 &> /dev/null
+ ip netns del ns1
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local ns=$2
+ local addr
+
+ [ -n "$ns" ] && ns="-netns $ns"
+
+ addr=$(ip -6 -br $ns addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+gre_create_tun()
+{
+ local a1=$1
+ local a2=$2
+ local mode
+
+ [[ $a1 =~ ^[0-9.]*$ ]] && mode=gre || mode=ip6gre
+
+ ip tunnel add gre1 mode $mode local $a1 remote $a2 dev veth0
+ ip link set gre1 up
+ $IP tunnel add gre1 mode $mode local $a2 remote $a1 dev veth0
+ $IP link set gre1 up
+}
+
+gre_gst_test_checks()
+{
+ local name=$1
+ local addr=$2
+
+ $NS_EXEC nc -kl $port >/dev/null &
+ PID=$!
+ while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done
+
+ cat $TMPFILE | timeout 1 nc $addr $port
+ log_test $? 0 "$name - copy file w/ TSO"
+
+ ethtool -K veth0 tso off
+
+ cat $TMPFILE | timeout 1 nc $addr $port
+ log_test $? 0 "$name - copy file w/ GSO"
+
+ ethtool -K veth0 tso on
+
+ kill $PID
+ PID=
+}
+
+gre6_gso_test()
+{
+ local port=7777
+
+ setup
+
+ a1=$(get_linklocal veth0)
+ a2=$(get_linklocal veth0 ns1)
+
+ gre_create_tun $a1 $a2
+
+ ip addr add 172.16.2.1/24 dev gre1
+ $IP addr add 172.16.2.2/24 dev gre1
+
+ ip -6 addr add 2001:db8:1::1/64 dev gre1 nodad
+ $IP -6 addr add 2001:db8:1::2/64 dev gre1 nodad
+
+ sleep 2
+
+ gre_gst_test_checks GREv6/v4 172.16.2.2
+ gre_gst_test_checks GREv6/v6 2001:db8:1::2
+
+ cleanup
+}
+
+gre_gso_test()
+{
+ gre6_gso_test
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v nc)" ]; then
+ echo "SKIP: Could not run test without nc tool"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ gre_gso) gre_gso_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644
index 000000000000..cf37ce86b0fd
--- /dev/null
+++ b/tools/testing/selftests/net/gro.c
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ * Data packets of the same size and same header setup with correct
+ * sequence numbers coalesce. The one exception being the last data
+ * packet coalesced: it can be smaller than the rest and coalesced
+ * as long as it is in the same flow.
+ * 2.ack
+ * Pure ACK does not coalesce.
+ * 3.flags
+ * Specific test cases: no packets with PSH, SYN, URG, RST set will
+ * be coalesced.
+ * 4.tcp
+ * Packets with incorrect checksum, non-consecutive seqno and
+ * different TCP header options shouldn't coalesce. Nit: given that
+ * some extension headers have paddings, such as timestamp, headers
+ * that are padding differently would not be coalesced.
+ * 5.ip:
+ * Packets with different (ECN, TTL, TOS) header, ip options or
+ * ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+ va_list args;
+
+ if (verbose) {
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+}
+
+static void setup_sock_filter(int fd)
+{
+ const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+ const int ethproto_off = offsetof(struct ethhdr, h_proto);
+ int optlen = 0;
+ int ipproto_off;
+ int next_off;
+
+ if (proto == PF_INET)
+ next_off = offsetof(struct iphdr, protocol);
+ else
+ next_off = offsetof(struct ipv6hdr, nexthdr);
+ ipproto_off = ETH_HLEN + next_off;
+
+ if (strcmp(testname, "ip") == 0) {
+ if (proto == PF_INET)
+ optlen = sizeof(struct ip_timestamp);
+ else
+ optlen = sizeof(struct ip6_frag);
+ }
+
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+
+ struct sock_fprog bpf = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+ error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+ if (len & 1)
+ sum += ((char *)data)[len - 1];
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+ struct pseudo_header6 {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph6;
+ struct pseudo_header4 {
+ struct in_addr saddr;
+ struct in_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph4;
+ uint32_t sum = 0;
+
+ if (proto == PF_INET6) {
+ if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+ error(1, errno, "inet_pton6 source ip pseudo");
+ if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+ error(1, errno, "inet_pton6 dest ip pseudo");
+ ph6.protocol = htons(IPPROTO_TCP);
+ ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+ } else if (proto == PF_INET) {
+ if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+ error(1, errno, "inet_pton source ip pseudo");
+ if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+ error(1, errno, "inet_pton dest ip pseudo");
+ ph4.protocol = htons(IPPROTO_TCP);
+ ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+ }
+
+ return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+ if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &mac_addr[0], &mac_addr[1], &mac_addr[2],
+ &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+ error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+ struct ethhdr *eth = buf;
+
+ memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+ memcpy(eth->h_source, src_mac, ETH_ALEN);
+ eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+ struct ipv6hdr *ip6h = buf;
+ struct iphdr *iph = buf;
+
+ if (proto == PF_INET6) {
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+ ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->hop_limit = 8;
+ if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+ error(1, errno, "inet_pton source ip6");
+ if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+ error(1, errno, "inet_pton dest ip6");
+ } else if (proto == PF_INET) {
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = IPPROTO_TCP;
+ iph->tot_len = htons(sizeof(struct tcphdr) +
+ payload_len + sizeof(struct iphdr));
+ iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+ if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+ error(1, errno, "inet_pton source ip");
+ if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+ error(1, errno, "inet_pton dest ip");
+ iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+ }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ struct tcphdr *tcph = buf;
+
+ memset(tcph, 0, sizeof(*tcph));
+
+ tcph->source = htons(SPORT);
+ tcph->dest = htons(DPORT);
+ tcph->seq = ntohl(START_SEQ + seq_offset);
+ tcph->ack_seq = ntohl(START_ACK + ack_offset);
+ tcph->ack = 1;
+ tcph->fin = fin;
+ tcph->doff = 5;
+ tcph->window = htons(TCP_MAXWIN);
+ tcph->urg_ptr = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+ int ret = -1;
+
+ ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+ if (ret == -1)
+ error(1, errno, "sendto failure");
+ if (ret != len)
+ error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ memset(buf, 0, total_hdr_len);
+ memset(buf + total_hdr_len, 'a', payload_len);
+ fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+ payload_len, fin);
+ fill_networklayer(buf + ETH_HLEN, payload_len);
+ fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg)
+{
+ static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, flag, i;
+ struct tcphdr *tcph;
+
+ payload_len = PAYLOAD_LEN * psh;
+ pkt_size = total_hdr_len + payload_len;
+ flag = NUM_PACKETS / 2;
+
+ create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+ tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+ tcph->psh = psh;
+ tcph->syn = syn;
+ tcph->rst = rst;
+ tcph->urg = urg;
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+
+ for (i = 0; i < NUM_PACKETS + 1; i++) {
+ if (i == flag) {
+ write_packet(fd, flag_buf, pkt_size, daddr);
+ continue;
+ }
+ create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+ int payload_len1, int payload_len2)
+{
+ static char buf[ETH_HLEN + IP_MAXPACKET];
+
+ create_packet(buf, 0, 0, payload_len1, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+ create_packet(buf, payload_len1, 0, payload_len2, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+ static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+ static char last[TOTAL_HDR_LEN + MSS];
+ static char new_seg[TOTAL_HDR_LEN + MSS];
+ int i;
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ create_packet(pkts[i], i * MSS, 0, MSS, 0);
+ create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+ create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+ write_packet(fd, last, total_hdr_len + remainder, daddr);
+ write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN];
+
+ create_packet(buf, 0, 0, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ create_packet(buf, 0, 1, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+ struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ memmove(buf, no_ext, total_hdr_len);
+ memmove(buf + total_hdr_len + extlen,
+ no_ext + total_hdr_len, PAYLOAD_LEN);
+
+ tcphdr->doff = tcphdr->doff + (extlen / 4);
+ tcphdr->check = 0;
+ tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+ if (proto == PF_INET) {
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+ struct tcp_option_ts {
+ uint8_t kind;
+ uint8_t len;
+ uint32_t tsval;
+ uint32_t tsecr;
+ } *opt_ts = (void *)buf;
+ struct tcp_option_window {
+ uint8_t kind;
+ uint8_t len;
+ uint8_t shift;
+ } *opt_window = (void *)buf;
+
+ switch (kind) {
+ case TCPOPT_NOP:
+ buf[0] = TCPOPT_NOP;
+ break;
+ case TCPOPT_WINDOW:
+ memset(opt_window, 0, sizeof(struct tcp_option_window));
+ opt_window->kind = TCPOPT_WINDOW;
+ opt_window->len = TCPOLEN_WINDOW;
+ opt_window->shift = 0;
+ break;
+ case TCPOPT_TIMESTAMP:
+ memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+ opt_ts->kind = TCPOPT_TIMESTAMP;
+ opt_ts->len = TCPOLEN_TIMESTAMP;
+ opt_ts->tsval = ts;
+ opt_ts->tsecr = 0;
+ break;
+ default:
+ error(1, 0, "unimplemented TCP option");
+ break;
+ }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+ switch (order) {
+ case 0:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+ TCPOPT_TIMESTAMP, ts);
+ break;
+ case 1:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1,
+ TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+ TCPOPT_NOP, 0);
+ break;
+ case 2:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+ TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+ TCPOPT_NOP, 0);
+ break;
+ default:
+ error(1, 0, "unknown order");
+ break;
+ }
+ recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->check = tcph->check - 1;
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->seq = ntohl(htonl(tcph->seq) + 1);
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+ * don't coalesce.
+ */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 1);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 2);
+ write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+ int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+ int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+ recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+ write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+ struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+ int optlen = sizeof(struct ip_timestamp);
+ struct iphdr *iph;
+
+ if (optlen % 4)
+ error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+ ts->ipt_code = IPOPT_TS;
+ ts->ipt_len = optlen;
+ ts->ipt_ptr = 5;
+ ts->ipt_flg = IPOPT_TS_TSONLY;
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph = (struct iphdr *)(optpkt + ETH_HLEN);
+ iph->ihl = 5 + (optlen / 4);
+ iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+ int optlen = sizeof(struct ip_timestamp);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv4_ts_option(buf, optpkt);
+ write_packet(fd, optpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/* IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[IP_MAXPACKET];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ /* Once fragmented, packet would retain the total_len.
+ * Tcp header is prepared as if rest of data is in follow-up frags,
+ * but follow up frags aren't actually sent.
+ */
+ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+ fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+ fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+ fill_datalinklayer(buf);
+
+ iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ iph->ttl = 7;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ iph->tos = 1;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else if (proto == PF_INET6) {
+ ip6h->priority = 0xf;
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+ sizeof(struct ip6_frag)];
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+ int extlen = sizeof(struct ip6_frag);
+ int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+ int extpkt_len = bufpkt_len + extlen;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+ }
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ memset(extpkt, 0, extpkt_len);
+
+ ip6h->nexthdr = IPPROTO_FRAGMENT;
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ frag->ip6f_nxt = IPPROTO_TCP;
+
+ memcpy(extpkt, buf, tcp_offset);
+ memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+ write_packet(fd, extpkt, extpkt_len, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+ struct sockaddr_ll daddr = {};
+
+ daddr.sll_family = AF_PACKET;
+ daddr.sll_protocol = ethhdr_proto;
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+ error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+ struct timeval timeout;
+
+ timeout.tv_sec = 120;
+ timeout.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout)) < 0)
+ error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+ int correct_num_pkts)
+{
+ static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+ struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+ struct tcphdr *tcph;
+ bool bad_packet = false;
+ int tcp_ext_len = 0;
+ int ip_ext_len = 0;
+ int pkt_size = -1;
+ int data_len = 0;
+ int num_pkt = 0;
+ int i;
+
+ vlog("Expected {");
+ for (i = 0; i < correct_num_pkts; i++)
+ vlog("%d ", correct_payload[i]);
+ vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+ while (1) {
+ pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+ if (pkt_size < 0)
+ error(1, errno, "could not receive");
+
+ if (iph->version == 4)
+ ip_ext_len = (iph->ihl - 5) * 4;
+ else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+ ip_ext_len = sizeof(struct ip6_frag);
+
+ tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+ if (tcph->fin)
+ break;
+
+ tcp_ext_len = (tcph->doff - 5) * 4;
+ data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+ /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+ * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+ * Packet sockets are protocol agnostic, and will not trim the padding.
+ */
+ if (pkt_size == ETH_ZLEN && iph->version == 4) {
+ data_len = ntohs(iph->tot_len)
+ - sizeof(struct tcphdr) - sizeof(struct iphdr);
+ }
+ vlog("%d ", data_len);
+ if (data_len != correct_payload[num_pkt]) {
+ vlog("[!=%d]", correct_payload[num_pkt]);
+ bad_packet = true;
+ }
+ num_pkt++;
+ }
+ vlog("}, Total %d packets.\n", num_pkt);
+ if (num_pkt != correct_num_pkts)
+ error(1, 0, "incorrect number of packets");
+ if (bad_packet)
+ error(1, 0, "incorrect packet geometry");
+
+ printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+ static char fin_pkt[MAX_HDR_LEN];
+ struct sockaddr_ll daddr = {};
+ int txfd = -1;
+
+ txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (txfd < 0)
+ error(1, errno, "socket creation");
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+ daddr.sll_family = AF_PACKET;
+ memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+ daddr.sll_halen = ETH_ALEN;
+ create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+ if (strcmp(testname, "data") == 0) {
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ack") == 0) {
+ send_ack(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 0, 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "tcp") == 0) {
+ send_changed_checksum(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_seq(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_ts(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_diff_opt(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip") == 0) {
+ send_changed_ECN(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_tos(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ if (proto == PF_INET) {
+ /* Modified packets may be received out of order.
+ * Sleep function added to enforce test boundaries
+ * so that fin pkts are not received prior to other pkts.
+ */
+ sleep(1);
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_ip_options(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_fragment4(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (proto == PF_INET6) {
+ send_fragment6(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ /* 20 is the difference between min iphdr size
+ * and min ipv6hdr size. Like MAX_HDR_SIZE,
+ * MAX_PAYLOAD is defined with the larger header of the two.
+ */
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ send_large(txfd, &daddr, remainder);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_large(txfd, &daddr, remainder + 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else {
+ error(1, 0, "Unknown testcase");
+ }
+
+ if (close(txfd))
+ error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+ static int correct_payload[NUM_PACKETS];
+ int rxfd = -1;
+
+ rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+ if (rxfd < 0)
+ error(1, 0, "socket creation");
+ setup_sock_filter(rxfd);
+ set_timeout(rxfd);
+ bind_packetsocket(rxfd);
+
+ memset(correct_payload, 0, sizeof(correct_payload));
+
+ if (strcmp(testname, "data") == 0) {
+ printf("pure data packet of same size: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("large data packets followed by a smaller one: ");
+ correct_payload[0] = PAYLOAD_LEN * 1.5;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("small data packets followed by a larger one: ");
+ correct_payload[0] = PAYLOAD_LEN / 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ack") == 0) {
+ printf("duplicate ack and pure ack: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "flags") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 3;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+
+ printf("psh flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("syn flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("rst flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("urg flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "tcp") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+
+ printf("changed checksum does not coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Wrong Seq number doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Different timestamp doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 4);
+
+ printf("Different options doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+
+ printf("different ECN doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("different tos doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ if (proto == PF_INET) {
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("ip options doesn't coalesce: ");
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (proto == PF_INET6) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ correct_payload[0] = (MAX_PAYLOAD + offset);
+ correct_payload[1] = remainder;
+ printf("Shouldn't coalesce if exceed IP max pkt size: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* last segment sent individually, doesn't start new segment */
+ correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[1] = remainder + 1;
+ correct_payload[2] = remainder + 1;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else {
+ error(1, 0, "Test case error, should never trigger");
+ }
+
+ if (close(rxfd))
+ error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+ static const struct option opts[] = {
+ { "dmac", required_argument, NULL, 'D' },
+ { "iface", required_argument, NULL, 'i' },
+ { "ipv4", no_argument, NULL, '4' },
+ { "ipv6", no_argument, NULL, '6' },
+ { "rx", no_argument, NULL, 'r' },
+ { "smac", required_argument, NULL, 'S' },
+ { "test", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { 0, 0, 0, 0 }
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+ switch (c) {
+ case '4':
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
+ case '6':
+ proto = PF_INET6;
+ ethhdr_proto = htons(ETH_P_IPV6);
+ break;
+ case 'D':
+ dmac = optarg;
+ break;
+ case 'i':
+ ifname = optarg;
+ break;
+ case 'r':
+ tx_socket = false;
+ break;
+ case 'S':
+ smac = optarg;
+ break;
+ case 't':
+ testname = optarg;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ error(1, 0, "%s invalid option %c\n", __func__, c);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv);
+
+ if (proto == PF_INET) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET6) {
+ tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+ total_hdr_len = MAX_HDR_LEN;
+ } else {
+ error(1, 0, "Protocol family is not ipv4 or ipv6");
+ }
+
+ read_MAC(src_mac, smac);
+ read_MAC(dst_mac, dmac);
+
+ if (tx_socket)
+ gro_sender();
+ else
+ gro_receiver();
+ return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755
index 000000000000..342ad27f631b
--- /dev/null
+++ b/tools/testing/selftests/net/gro.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev=""
+test="all"
+proto="ipv4"
+
+run_test() {
+ local server_pid=0
+ local exit_code=0
+ local protocol=$1
+ local test=$2
+ local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+ "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+ setup_ns
+ # Each test is run 3 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ for tries in {1..3}; do
+ # Actual test starts here
+ ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+ 1>>log.txt &
+ server_pid=$!
+ sleep 0.5 # to allow for socket init
+ ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+ 1>>log.txt
+ wait "${server_pid}"
+ exit_code=$?
+ if [[ "${exit_code}" -eq 0 ]]; then
+ break;
+ fi
+ done
+ cleanup_ns
+ echo ${exit_code}
+}
+
+run_all_tests() {
+ local failed_tests=()
+ for proto in "${PROTOS[@]}"; do
+ for test in "${TESTS[@]}"; do
+ echo "running test ${proto} ${test}" >&2
+ exit_code=$(run_test $proto $test)
+ if [[ "${exit_code}" -ne 0 ]]; then
+ failed_tests+=("${proto}_${test}")
+ fi;
+ done;
+ done
+ if [[ ${#failed_tests[@]} -ne 0 ]]; then
+ echo "failed tests: ${failed_tests[*]}. \
+ Please see log.txt for more logs"
+ exit 1
+ else
+ echo "All Tests Succeeded!"
+ fi;
+}
+
+usage() {
+ echo "Usage: $0 \
+ [-i <DEV>] \
+ [-t data|ack|flags|tcp|ip|large] \
+ [-p <ipv4|ipv6>]" 1>&2;
+ exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+ case "${opt}" in
+ i)
+ dev="${OPTARG}"
+ ;;
+ t)
+ test="${OPTARG}"
+ ;;
+ p)
+ proto="${OPTARG}"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+
+if [ -n "$dev" ]; then
+ source setup_loopback.sh
+else
+ source setup_veth.sh
+fi
+
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+ run_all_tests
+else
+ run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
new file mode 100755
index 000000000000..3caf72bb9c6a
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -0,0 +1,652 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
+# consistency directly inside packets on the receiver side. Tests are divided
+# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
+# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates
+# wider use cases that do not fall into the other two categories). Both OUTPUT
+# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
+# tests use the entire three-node topology (alpha, beta, gamma). Each test is
+# documented inside its own handler in the code below.
+#
+# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
+# When either Beta or Gamma is the destination (depending on the test category),
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | Alpha netns | | Gamma netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | db01::2/64 | | | | db02::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +----------------------------------------------------+
+# | . . |
+# | +-------------+ +-------------+ |
+# | | veth0 | | veth1 | |
+# | | db01::1/64 | ................ | db02::1/64 | |
+# | +-------------+ +-------------+ |
+# | |
+# | Beta netns |
+# | |
+# +----------------------------------------------------+
+#
+#
+#
+# =============================================================
+# | Alpha - IOAM configuration |
+# +===========================================================+
+# | Node ID | 1 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 11111111 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress ID | 101 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 101101 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee0 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf00dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 777 |
+# +-----------------------------------------------------------+
+# | Schema Data | something that will be 4n-aligned |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Beta - IOAM configuration |
+# +===========================================================+
+# | Node ID | 2 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 22222222 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 201 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 201201 |
+# +-----------------------------------------------------------+
+# | Egress ID | 202 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 202202 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee1 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf11dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 666 |
+# +-----------------------------------------------------------+
+# | Schema Data | Hello there -Obi |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Gamma - IOAM configuration |
+# +===========================================================+
+# | Node ID | 3 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 33333333 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 301 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 301301 |
+# +-----------------------------------------------------------+
+# | Egress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee2 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf22dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 0xffffff (= None) |
+# +-----------------------------------------------------------+
+# | Schema Data | |
+# +-----------------------------------------------------------+
+
+
+################################################################################
+# #
+# WARNING: Be careful if you modify the block below - it MUST be kept #
+# synchronized with configurations inside ioam6_parser.c and always #
+# reflect the same. #
+# #
+################################################################################
+
+ALPHA=(
+ 1 # ID
+ 11111111 # Wide ID
+ 0xffff # Ingress ID
+ 0xffffffff # Ingress Wide ID
+ 101 # Egress ID
+ 101101 # Egress Wide ID
+ 0xdeadbee0 # Namespace Data
+ 0xcafec0caf00dc0de # Namespace Wide Data
+ 777 # Schema ID (0xffffff = None)
+ "something that will be 4n-aligned" # Schema Data
+)
+
+BETA=(
+ 2
+ 22222222
+ 201
+ 201201
+ 202
+ 202202
+ 0xdeadbee1
+ 0xcafec0caf11dc0de
+ 666
+ "Hello there -Obi"
+)
+
+GAMMA=(
+ 3
+ 33333333
+ 301
+ 301301
+ 0xffff
+ 0xffffffff
+ 0xdeadbee2
+ 0xcafec0caf22dc0de
+ 0xffffff
+ ""
+)
+
+TESTS_OUTPUT="
+ out_undef_ns
+ out_no_room
+ out_bits
+ out_full_supp_trace
+"
+
+TESTS_INPUT="
+ in_undef_ns
+ in_no_room
+ in_oflag
+ in_bits
+ in_full_supp_trace
+"
+
+TESTS_GLOBAL="
+ fwd_full_supp_trace
+"
+
+
+################################################################################
+# #
+# LIBRARY #
+# #
+################################################################################
+
+check_kernel_compatibility()
+{
+ ip netns add ioam-tmp-node
+ ip link add name veth0 netns ioam-tmp-node type veth \
+ peer name veth1 netns ioam-tmp-node
+
+ ip -netns ioam-tmp-node link set veth0 up
+ ip -netns ioam-tmp-node link set veth1 up
+
+ ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null
+ ns_ad=$?
+
+ ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0"
+ ns_sh=$?
+
+ if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+ then
+ echo "SKIP: kernel version probably too old, missing ioam support"
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+ exit 1
+ fi
+
+ ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0 &>/dev/null
+ tr_ad=$?
+
+ ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace"
+ tr_sh=$?
+
+ if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+ then
+ echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
+ "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+ exit 1
+ fi
+
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+}
+
+cleanup()
+{
+ ip link del ioam-veth-alpha 2>/dev/null || true
+ ip link del ioam-veth-gamma 2>/dev/null || true
+
+ ip netns del ioam-node-alpha || true
+ ip netns del ioam-node-beta || true
+ ip netns del ioam-node-gamma || true
+}
+
+setup()
+{
+ ip netns add ioam-node-alpha
+ ip netns add ioam-node-beta
+ ip netns add ioam-node-gamma
+
+ ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \
+ peer name ioam-veth-betaL netns ioam-node-beta
+ ip link add name ioam-veth-betaR netns ioam-node-beta type veth \
+ peer name ioam-veth-gamma netns ioam-node-gamma
+
+ ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0
+ ip -netns ioam-node-beta link set ioam-veth-betaL name veth0
+ ip -netns ioam-node-beta link set ioam-veth-betaR name veth1
+ ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0
+
+ ip -netns ioam-node-alpha addr add db01::2/64 dev veth0
+ ip -netns ioam-node-alpha link set veth0 up
+ ip -netns ioam-node-alpha link set lo up
+ ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0
+ ip -netns ioam-node-alpha route del db01::/64
+ ip -netns ioam-node-alpha route add db01::/64 dev veth0
+
+ ip -netns ioam-node-beta addr add db01::1/64 dev veth0
+ ip -netns ioam-node-beta addr add db02::1/64 dev veth1
+ ip -netns ioam-node-beta link set veth0 up
+ ip -netns ioam-node-beta link set veth1 up
+ ip -netns ioam-node-beta link set lo up
+
+ ip -netns ioam-node-gamma addr add db02::2/64 dev veth0
+ ip -netns ioam-node-gamma link set veth0 up
+ ip -netns ioam-node-gamma link set lo up
+ ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0
+
+ # - IOAM config -
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+ ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+ ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+ ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+ ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}"
+ ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]}
+
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+ ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+
+ sleep 1
+
+ ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+ if [ $? != 0 ]
+ then
+ echo "Setup FAILED"
+ cleanup &>/dev/null
+ exit 0
+ fi
+}
+
+log_test_passed()
+{
+ local desc=$1
+ printf "TEST: %-60s [ OK ]\n" "${desc}"
+}
+
+log_test_failed()
+{
+ local desc=$1
+ printf "TEST: %-60s [FAIL]\n" "${desc}"
+}
+
+run_test()
+{
+ local name=$1
+ local desc=$2
+ local node_src=$3
+ local node_dst=$4
+ local ip6_src=$5
+ local ip6_dst=$6
+ local if_dst=$7
+ local trace_type=$8
+ local ioam_ns=$9
+
+ ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \
+ $trace_type $ioam_ns &
+ local spid=$!
+ sleep 0.1
+
+ ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+ if [ $? != 0 ]
+ then
+ log_test_failed "${desc}"
+ kill -2 $spid &>/dev/null
+ else
+ wait $spid
+ [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+ fi
+}
+
+run()
+{
+ echo
+ echo "OUTPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set OUTPUT settings
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+
+ for t in $TESTS_OUTPUT
+ do
+ $t
+ done
+
+ # clean OUTPUT settings
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+ echo
+ echo "INPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set INPUT settings
+ ip -netns ioam-node-alpha ioam namespace del 123
+
+ for t in $TESTS_INPUT
+ do
+ $t
+ done
+
+ # clean INPUT settings
+ ip -netns ioam-node-alpha ioam namespace add 123 \
+ data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+ ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+ echo
+ echo "GLOBAL tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ for t in $TESTS_GLOBAL
+ do
+ $t
+ done
+}
+
+bit2type=(
+ 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
+ 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
+ 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+)
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+
+
+################################################################################
+# #
+# OUTPUT tests #
+# #
+# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. #
+################################################################################
+
+out_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace if the chosen IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0x800000 0
+}
+
+out_no_room()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace and will set the #
+ # Overflow flag since there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+}
+
+out_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the encap node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+
+ for i in {0..22}
+ do
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+ run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ done
+
+ bit2size[22]=$tmp
+}
+
+out_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the encap node will correctly fill a full trace. Be careful,#
+ # "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 100 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# INPUT tests #
+# #
+# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon #
+# insertion -> the IOAM namespace configured on the sender is removed #
+# and is used in the inserted trace to force the sender not to fill it. #
+################################################################################
+
+in_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace if the related IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0x800000 0
+}
+
+in_no_room()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace and will set the #
+ # Overflow flag if there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+}
+
+in_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the receiving node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+ for i in {0..22}
+ do
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+ run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ done
+
+ bit2size[22]=$tmp
+}
+
+in_oflag()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace since the Overflow #
+ # flag is set. #
+ ##############################################################################
+ local desc="Overflow flag is set"
+
+ # Exception:
+ # Here, we need the sender to set the Overflow flag. For that, we will add
+ # back the IOAM namespace that was previously configured on the sender.
+ ip -netns ioam-node-alpha ioam namespace add 123
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+
+ # And we clean the exception for this test to get things back to normal for
+ # other INPUT tests
+ ip -netns ioam-node-alpha ioam namespace del 123
+}
+
+in_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the receiving node will correctly fill a full trace. Be #
+ # careful, "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 80 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# GLOBAL tests #
+# #
+# Three nodes (sender/router/receiver), IOAM fully enabled on every node. #
+################################################################################
+
+fwd_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that all three nodes correctly filled the full supported trace #
+ # by checking that the trace data is consistent with the predefined config. #
+ ##############################################################################
+ local desc="Forward - Full supported trace"
+
+ ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \
+ db02::2 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# MAIN #
+# #
+################################################################################
+
+if [ "$(id -u)" -ne 0 ]
+then
+ echo "SKIP: Need root privileges"
+ exit 1
+fi
+
+if [ ! -x "$(command -v ip)" ]
+then
+ echo "SKIP: Could not run test without ip tool"
+ exit 1
+fi
+
+ip ioam &>/dev/null
+if [ $? = 1 ]
+then
+ echo "SKIP: iproute2 too old, missing ioam command"
+ exit 1
+fi
+
+check_kernel_compatibility
+
+cleanup &>/dev/null
+setup
+run
+cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
new file mode 100644
index 000000000000..d376cb2c383c
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Justin Iurman (justin.iurman@uliege.be)
+ *
+ * IOAM tester for IPv6, see ioam6.sh for details on each test case.
+ */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/const.h>
+#include <linux/if_ether.h>
+#include <linux/ioam6.h>
+#include <linux/ipv6.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct ioam_config {
+ __u32 id;
+ __u64 wide;
+ __u16 ingr_id;
+ __u16 egr_id;
+ __u32 ingr_wide;
+ __u32 egr_wide;
+ __u32 ns_data;
+ __u64 ns_wide;
+ __u32 sc_id;
+ __u8 hlim;
+ char *sc_data;
+};
+
+/*
+ * Be careful if you modify structs below - everything MUST be kept synchronized
+ * with configurations inside ioam6.sh and always reflect the same.
+ */
+
+static struct ioam_config node1 = {
+ .id = 1,
+ .wide = 11111111,
+ .ingr_id = 0xffff, /* default value */
+ .egr_id = 101,
+ .ingr_wide = 0xffffffff, /* default value */
+ .egr_wide = 101101,
+ .ns_data = 0xdeadbee0,
+ .ns_wide = 0xcafec0caf00dc0de,
+ .sc_id = 777,
+ .sc_data = "something that will be 4n-aligned",
+ .hlim = 64,
+};
+
+static struct ioam_config node2 = {
+ .id = 2,
+ .wide = 22222222,
+ .ingr_id = 201,
+ .egr_id = 202,
+ .ingr_wide = 201201,
+ .egr_wide = 202202,
+ .ns_data = 0xdeadbee1,
+ .ns_wide = 0xcafec0caf11dc0de,
+ .sc_id = 666,
+ .sc_data = "Hello there -Obi",
+ .hlim = 63,
+};
+
+static struct ioam_config node3 = {
+ .id = 3,
+ .wide = 33333333,
+ .ingr_id = 301,
+ .egr_id = 0xffff, /* default value */
+ .ingr_wide = 301301,
+ .egr_wide = 0xffffffff, /* default value */
+ .ns_data = 0xdeadbee2,
+ .ns_wide = 0xcafec0caf22dc0de,
+ .sc_id = 0xffffff, /* default value */
+ .sc_data = NULL,
+ .hlim = 62,
+};
+
+enum {
+ /**********
+ * OUTPUT *
+ **********/
+ TEST_OUT_UNDEF_NS,
+ TEST_OUT_NO_ROOM,
+ TEST_OUT_BIT0,
+ TEST_OUT_BIT1,
+ TEST_OUT_BIT2,
+ TEST_OUT_BIT3,
+ TEST_OUT_BIT4,
+ TEST_OUT_BIT5,
+ TEST_OUT_BIT6,
+ TEST_OUT_BIT7,
+ TEST_OUT_BIT8,
+ TEST_OUT_BIT9,
+ TEST_OUT_BIT10,
+ TEST_OUT_BIT11,
+ TEST_OUT_BIT12,
+ TEST_OUT_BIT13,
+ TEST_OUT_BIT14,
+ TEST_OUT_BIT15,
+ TEST_OUT_BIT16,
+ TEST_OUT_BIT17,
+ TEST_OUT_BIT18,
+ TEST_OUT_BIT19,
+ TEST_OUT_BIT20,
+ TEST_OUT_BIT21,
+ TEST_OUT_BIT22,
+ TEST_OUT_FULL_SUPP_TRACE,
+
+ /*********
+ * INPUT *
+ *********/
+ TEST_IN_UNDEF_NS,
+ TEST_IN_NO_ROOM,
+ TEST_IN_OFLAG,
+ TEST_IN_BIT0,
+ TEST_IN_BIT1,
+ TEST_IN_BIT2,
+ TEST_IN_BIT3,
+ TEST_IN_BIT4,
+ TEST_IN_BIT5,
+ TEST_IN_BIT6,
+ TEST_IN_BIT7,
+ TEST_IN_BIT8,
+ TEST_IN_BIT9,
+ TEST_IN_BIT10,
+ TEST_IN_BIT11,
+ TEST_IN_BIT12,
+ TEST_IN_BIT13,
+ TEST_IN_BIT14,
+ TEST_IN_BIT15,
+ TEST_IN_BIT16,
+ TEST_IN_BIT17,
+ TEST_IN_BIT18,
+ TEST_IN_BIT19,
+ TEST_IN_BIT20,
+ TEST_IN_BIT21,
+ TEST_IN_BIT22,
+ TEST_IN_FULL_SUPP_TRACE,
+
+ /**********
+ * GLOBAL *
+ **********/
+ TEST_FWD_FULL_SUPP_TRACE,
+
+ __TEST_MAX,
+};
+
+static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
+ __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+ return 1;
+
+ switch (tid) {
+ case TEST_OUT_UNDEF_NS:
+ case TEST_IN_UNDEF_NS:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_NO_ROOM:
+ case TEST_IN_NO_ROOM:
+ case TEST_IN_OFLAG:
+ return !ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_BIT0:
+ case TEST_IN_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_IN_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_IN_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_IN_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_IN_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_IN_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_IN_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_IN_BIT7:
+ case TEST_OUT_BIT11:
+ case TEST_IN_BIT11:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT8:
+ case TEST_IN_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_IN_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_IN_BIT10:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT12:
+ case TEST_IN_BIT12:
+ case TEST_OUT_BIT13:
+ case TEST_IN_BIT13:
+ case TEST_OUT_BIT14:
+ case TEST_IN_BIT14:
+ case TEST_OUT_BIT15:
+ case TEST_IN_BIT15:
+ case TEST_OUT_BIT16:
+ case TEST_IN_BIT16:
+ case TEST_OUT_BIT17:
+ case TEST_IN_BIT17:
+ case TEST_OUT_BIT18:
+ case TEST_IN_BIT18:
+ case TEST_OUT_BIT19:
+ case TEST_IN_BIT19:
+ case TEST_OUT_BIT20:
+ case TEST_IN_BIT20:
+ case TEST_OUT_BIT21:
+ case TEST_IN_BIT21:
+ return ioam6h->overflow ||
+ ioam6h->nodelen ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_BIT22:
+ case TEST_IN_BIT22:
+ return ioam6h->overflow ||
+ ioam6h->nodelen ||
+ ioam6h->remlen;
+
+ case TEST_OUT_FULL_SUPP_TRACE:
+ case TEST_IN_FULL_SUPP_TRACE:
+ case TEST_FWD_FULL_SUPP_TRACE:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 15 ||
+ ioam6h->remlen;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
+ const struct ioam_config cnf)
+{
+ unsigned int len;
+ __u8 aligned;
+ __u64 raw64;
+ __u32 raw32;
+
+ if (ioam6h->type.bit0) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit1) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.ingr_id != (raw32 >> 16) ||
+ cnf.egr_id != (raw32 & 0xffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit2)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit3)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit4) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit5) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit6) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit7) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit8) {
+ raw64 = __be64_to_cpu(*((__u64 *)*p));
+ if (cnf.hlim != (raw64 >> 56) ||
+ cnf.wide != (raw64 & 0xffffffffffffff))
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit9) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
+ return 1;
+ *p += sizeof(__u32);
+
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit10) {
+ if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit11) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit22) {
+ len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
+ aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
+
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (aligned != (raw32 >> 24) * 4 ||
+ cnf.sc_id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+
+ if (cnf.sc_data) {
+ if (strncmp((char *)*p, cnf.sc_data, len))
+ return 1;
+
+ *p += len;
+ aligned -= len;
+
+ while (aligned--) {
+ if (**p != '\0')
+ return 1;
+ *p += sizeof(__u8);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ __u8 *p;
+
+ if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+ return 1;
+
+ p = ioam6h->data + ioam6h->remlen * 4;
+
+ switch (tid) {
+ case TEST_OUT_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_OUT_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_OUT_BIT11:
+ case TEST_OUT_BIT22:
+ case TEST_OUT_FULL_SUPP_TRACE:
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ case TEST_IN_BIT0:
+ case TEST_IN_BIT1:
+ case TEST_IN_BIT2:
+ case TEST_IN_BIT3:
+ case TEST_IN_BIT4:
+ case TEST_IN_BIT5:
+ case TEST_IN_BIT6:
+ case TEST_IN_BIT7:
+ case TEST_IN_BIT8:
+ case TEST_IN_BIT9:
+ case TEST_IN_BIT10:
+ case TEST_IN_BIT11:
+ case TEST_IN_BIT22:
+ case TEST_IN_FULL_SUPP_TRACE:
+ {
+ __u32 tmp32 = node2.egr_wide;
+ __u16 tmp16 = node2.egr_id;
+ int res;
+
+ node2.egr_id = 0xffff;
+ node2.egr_wide = 0xffffffff;
+
+ res = check_ioam6_data(&p, ioam6h, node2);
+
+ node2.egr_id = tmp16;
+ node2.egr_wide = tmp32;
+
+ return res;
+ }
+
+ case TEST_FWD_FULL_SUPP_TRACE:
+ if (check_ioam6_data(&p, ioam6h, node3))
+ return 1;
+ if (check_ioam6_data(&p, ioam6h, node2))
+ return 1;
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int str2id(const char *tname)
+{
+ if (!strcmp("out_undef_ns", tname))
+ return TEST_OUT_UNDEF_NS;
+ if (!strcmp("out_no_room", tname))
+ return TEST_OUT_NO_ROOM;
+ if (!strcmp("out_bit0", tname))
+ return TEST_OUT_BIT0;
+ if (!strcmp("out_bit1", tname))
+ return TEST_OUT_BIT1;
+ if (!strcmp("out_bit2", tname))
+ return TEST_OUT_BIT2;
+ if (!strcmp("out_bit3", tname))
+ return TEST_OUT_BIT3;
+ if (!strcmp("out_bit4", tname))
+ return TEST_OUT_BIT4;
+ if (!strcmp("out_bit5", tname))
+ return TEST_OUT_BIT5;
+ if (!strcmp("out_bit6", tname))
+ return TEST_OUT_BIT6;
+ if (!strcmp("out_bit7", tname))
+ return TEST_OUT_BIT7;
+ if (!strcmp("out_bit8", tname))
+ return TEST_OUT_BIT8;
+ if (!strcmp("out_bit9", tname))
+ return TEST_OUT_BIT9;
+ if (!strcmp("out_bit10", tname))
+ return TEST_OUT_BIT10;
+ if (!strcmp("out_bit11", tname))
+ return TEST_OUT_BIT11;
+ if (!strcmp("out_bit12", tname))
+ return TEST_OUT_BIT12;
+ if (!strcmp("out_bit13", tname))
+ return TEST_OUT_BIT13;
+ if (!strcmp("out_bit14", tname))
+ return TEST_OUT_BIT14;
+ if (!strcmp("out_bit15", tname))
+ return TEST_OUT_BIT15;
+ if (!strcmp("out_bit16", tname))
+ return TEST_OUT_BIT16;
+ if (!strcmp("out_bit17", tname))
+ return TEST_OUT_BIT17;
+ if (!strcmp("out_bit18", tname))
+ return TEST_OUT_BIT18;
+ if (!strcmp("out_bit19", tname))
+ return TEST_OUT_BIT19;
+ if (!strcmp("out_bit20", tname))
+ return TEST_OUT_BIT20;
+ if (!strcmp("out_bit21", tname))
+ return TEST_OUT_BIT21;
+ if (!strcmp("out_bit22", tname))
+ return TEST_OUT_BIT22;
+ if (!strcmp("out_full_supp_trace", tname))
+ return TEST_OUT_FULL_SUPP_TRACE;
+ if (!strcmp("in_undef_ns", tname))
+ return TEST_IN_UNDEF_NS;
+ if (!strcmp("in_no_room", tname))
+ return TEST_IN_NO_ROOM;
+ if (!strcmp("in_oflag", tname))
+ return TEST_IN_OFLAG;
+ if (!strcmp("in_bit0", tname))
+ return TEST_IN_BIT0;
+ if (!strcmp("in_bit1", tname))
+ return TEST_IN_BIT1;
+ if (!strcmp("in_bit2", tname))
+ return TEST_IN_BIT2;
+ if (!strcmp("in_bit3", tname))
+ return TEST_IN_BIT3;
+ if (!strcmp("in_bit4", tname))
+ return TEST_IN_BIT4;
+ if (!strcmp("in_bit5", tname))
+ return TEST_IN_BIT5;
+ if (!strcmp("in_bit6", tname))
+ return TEST_IN_BIT6;
+ if (!strcmp("in_bit7", tname))
+ return TEST_IN_BIT7;
+ if (!strcmp("in_bit8", tname))
+ return TEST_IN_BIT8;
+ if (!strcmp("in_bit9", tname))
+ return TEST_IN_BIT9;
+ if (!strcmp("in_bit10", tname))
+ return TEST_IN_BIT10;
+ if (!strcmp("in_bit11", tname))
+ return TEST_IN_BIT11;
+ if (!strcmp("in_bit12", tname))
+ return TEST_IN_BIT12;
+ if (!strcmp("in_bit13", tname))
+ return TEST_IN_BIT13;
+ if (!strcmp("in_bit14", tname))
+ return TEST_IN_BIT14;
+ if (!strcmp("in_bit15", tname))
+ return TEST_IN_BIT15;
+ if (!strcmp("in_bit16", tname))
+ return TEST_IN_BIT16;
+ if (!strcmp("in_bit17", tname))
+ return TEST_IN_BIT17;
+ if (!strcmp("in_bit18", tname))
+ return TEST_IN_BIT18;
+ if (!strcmp("in_bit19", tname))
+ return TEST_IN_BIT19;
+ if (!strcmp("in_bit20", tname))
+ return TEST_IN_BIT20;
+ if (!strcmp("in_bit21", tname))
+ return TEST_IN_BIT21;
+ if (!strcmp("in_bit22", tname))
+ return TEST_IN_BIT22;
+ if (!strcmp("in_full_supp_trace", tname))
+ return TEST_IN_FULL_SUPP_TRACE;
+ if (!strcmp("fwd_full_supp_trace", tname))
+ return TEST_FWD_FULL_SUPP_TRACE;
+
+ return -1;
+}
+
+static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+ return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
+ (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
+ (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
+ (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
+}
+
+static int get_u32(__u32 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int get_u16(__u16 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
+ [TEST_OUT_UNDEF_NS] = check_ioam_header,
+ [TEST_OUT_NO_ROOM] = check_ioam_header,
+ [TEST_OUT_BIT0] = check_ioam_header_and_data,
+ [TEST_OUT_BIT1] = check_ioam_header_and_data,
+ [TEST_OUT_BIT2] = check_ioam_header_and_data,
+ [TEST_OUT_BIT3] = check_ioam_header_and_data,
+ [TEST_OUT_BIT4] = check_ioam_header_and_data,
+ [TEST_OUT_BIT5] = check_ioam_header_and_data,
+ [TEST_OUT_BIT6] = check_ioam_header_and_data,
+ [TEST_OUT_BIT7] = check_ioam_header_and_data,
+ [TEST_OUT_BIT8] = check_ioam_header_and_data,
+ [TEST_OUT_BIT9] = check_ioam_header_and_data,
+ [TEST_OUT_BIT10] = check_ioam_header_and_data,
+ [TEST_OUT_BIT11] = check_ioam_header_and_data,
+ [TEST_OUT_BIT12] = check_ioam_header,
+ [TEST_OUT_BIT13] = check_ioam_header,
+ [TEST_OUT_BIT14] = check_ioam_header,
+ [TEST_OUT_BIT15] = check_ioam_header,
+ [TEST_OUT_BIT16] = check_ioam_header,
+ [TEST_OUT_BIT17] = check_ioam_header,
+ [TEST_OUT_BIT18] = check_ioam_header,
+ [TEST_OUT_BIT19] = check_ioam_header,
+ [TEST_OUT_BIT20] = check_ioam_header,
+ [TEST_OUT_BIT21] = check_ioam_header,
+ [TEST_OUT_BIT22] = check_ioam_header_and_data,
+ [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_IN_UNDEF_NS] = check_ioam_header,
+ [TEST_IN_NO_ROOM] = check_ioam_header,
+ [TEST_IN_OFLAG] = check_ioam_header,
+ [TEST_IN_BIT0] = check_ioam_header_and_data,
+ [TEST_IN_BIT1] = check_ioam_header_and_data,
+ [TEST_IN_BIT2] = check_ioam_header_and_data,
+ [TEST_IN_BIT3] = check_ioam_header_and_data,
+ [TEST_IN_BIT4] = check_ioam_header_and_data,
+ [TEST_IN_BIT5] = check_ioam_header_and_data,
+ [TEST_IN_BIT6] = check_ioam_header_and_data,
+ [TEST_IN_BIT7] = check_ioam_header_and_data,
+ [TEST_IN_BIT8] = check_ioam_header_and_data,
+ [TEST_IN_BIT9] = check_ioam_header_and_data,
+ [TEST_IN_BIT10] = check_ioam_header_and_data,
+ [TEST_IN_BIT11] = check_ioam_header_and_data,
+ [TEST_IN_BIT12] = check_ioam_header,
+ [TEST_IN_BIT13] = check_ioam_header,
+ [TEST_IN_BIT14] = check_ioam_header,
+ [TEST_IN_BIT15] = check_ioam_header,
+ [TEST_IN_BIT16] = check_ioam_header,
+ [TEST_IN_BIT17] = check_ioam_header,
+ [TEST_IN_BIT18] = check_ioam_header,
+ [TEST_IN_BIT19] = check_ioam_header,
+ [TEST_IN_BIT20] = check_ioam_header,
+ [TEST_IN_BIT21] = check_ioam_header,
+ [TEST_IN_BIT22] = check_ioam_header_and_data,
+ [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+};
+
+int main(int argc, char **argv)
+{
+ int fd, size, hoplen, tid, ret = 1;
+ struct in6_addr src, dst;
+ struct ioam6_hdr *opt;
+ struct ipv6hdr *ip6h;
+ __u8 buffer[400], *p;
+ __u16 ioam_ns;
+ __u32 tr_type;
+
+ if (argc != 7)
+ goto out;
+
+ tid = str2id(argv[2]);
+ if (tid < 0 || !func[tid])
+ goto out;
+
+ if (inet_pton(AF_INET6, argv[3], &src) != 1 ||
+ inet_pton(AF_INET6, argv[4], &dst) != 1)
+ goto out;
+
+ if (get_u32(&tr_type, argv[5], 16) ||
+ get_u16(&ioam_ns, argv[6], 0))
+ goto out;
+
+ fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
+ if (!fd)
+ goto out;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ argv[1], strlen(argv[1])))
+ goto close;
+
+recv:
+ size = recv(fd, buffer, sizeof(buffer), 0);
+ if (size <= 0)
+ goto close;
+
+ ip6h = (struct ipv6hdr *)buffer;
+
+ if (!ipv6_addr_equal(&ip6h->saddr, &src) ||
+ !ipv6_addr_equal(&ip6h->daddr, &dst))
+ goto recv;
+
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+ goto close;
+
+ p = buffer + sizeof(*ip6h);
+ hoplen = (p[1] + 1) << 3;
+ p += sizeof(struct ipv6_hopopt_hdr);
+
+ while (hoplen > 0) {
+ opt = (struct ioam6_hdr *)p;
+
+ if (opt->opt_type == IPV6_TLV_IOAM &&
+ opt->type == IOAM6_TYPE_PREALLOC) {
+ p += sizeof(*opt);
+ ret = func[tid](tid, (struct ioam6_trace_hdr *)p,
+ tr_type, ioam_ns);
+ break;
+ }
+
+ p += opt->opt_len + 2;
+ hoplen -= opt->opt_len + 2;
+ }
+close:
+ close(fd);
+out:
+ return ret;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f02f4de2f3a0..255793c5ac4f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3,8 +3,10 @@
ret=0
sin=""
+sinfail=""
sout=""
cin=""
+cinfail=""
cinsent=""
cout=""
ksft_skip=4
@@ -76,6 +78,14 @@ init()
done
}
+init_shapers()
+{
+ for i in `seq 1 4`; do
+ tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+ tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
+ done
+}
+
cleanup_partial()
{
rm -f "$capout"
@@ -88,8 +98,8 @@ cleanup_partial()
cleanup()
{
- rm -f "$cin" "$cout"
- rm -f "$sin" "$sout" "$cinsent"
+ rm -f "$cin" "$cout" "$sinfail"
+ rm -f "$sin" "$sout" "$cinsent" "$cinfail"
cleanup_partial
}
@@ -211,11 +221,15 @@ link_failure()
{
ns="$1"
- l=$((RANDOM%4))
- l=$((l+1))
+ if [ -z "$FAILING_LINKS" ]; then
+ l=$((RANDOM%4))
+ FAILING_LINKS=$((l+1))
+ fi
- veth="ns1eth$l"
- ip -net "$ns" link set "$veth" down
+ for l in $FAILING_LINKS; do
+ veth="ns1eth$l"
+ ip -net "$ns" link set "$veth" down
+ done
}
# $1: IP address
@@ -280,10 +294,17 @@ do_transfer()
local_addr="0.0.0.0"
fi
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- ${local_addr} < "$sin" > "$sout" &
+ if [ "$test_link_fail" -eq 2 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
+ ${local_addr} < "$sinfail" > "$sout" &
+ else
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ ${local_addr} < "$sin" > "$sout" &
+ fi
spid=$!
sleep 1
@@ -294,7 +315,7 @@ do_transfer()
$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$connect_addr < "$cin" > "$cout" &
else
- ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+ ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
@@ -323,17 +344,18 @@ do_transfer()
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
counter=1
+ pos=1
dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns1 ]
do
+ id=${dump[$pos]}
ip netns exec ${listener_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
- let id+=1
+ let pos+=5
done
fi
elif [ $rm_nr_ns1 -eq 8 ]; then
@@ -345,6 +367,12 @@ do_transfer()
fi
fi
+ flags="subflow"
+ if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
+ flags="${flags},fullmesh"
+ addr_nr_ns2=${addr_nr_ns2:9}
+ fi
+
if [ $addr_nr_ns2 -gt 0 ]; then
let add_nr_ns2=addr_nr_ns2
counter=3
@@ -356,7 +384,7 @@ do_transfer()
else
addr="10.0.$counter.2"
fi
- ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
let counter+=1
let add_nr_ns2-=1
done
@@ -365,17 +393,18 @@ do_transfer()
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
counter=1
+ pos=1
dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns2 ]
do
+ id=${dump[$pos]}
ip netns exec ${connector_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
- let id+=1
+ let pos+=5
done
fi
elif [ $rm_nr_ns2 -eq 8 ]; then
@@ -434,7 +463,11 @@ do_transfer()
return 1
fi
- check_transfer $sin $cout "file received by client"
+ if [ "$test_link_fail" -eq 2 ];then
+ check_transfer $sinfail $cout "file received by client"
+ else
+ check_transfer $sin $cout "file received by client"
+ fi
retc=$?
if [ "$test_link_fail" -eq 0 ];then
check_transfer $cin $sout "file received by server"
@@ -477,29 +510,33 @@ run_tests()
lret=0
oldin=""
- if [ "$test_linkfail" -eq 1 ];then
- size=$((RANDOM%1024))
+ # create the input file for the failure test when
+ # the first failure test run
+ if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
+ # the client file must be considerably larger
+ # of the maximum expected cwin value, or the
+ # link utilization will be not predicable
+ size=$((RANDOM%2))
size=$((size+1))
- size=$((size*128))
+ size=$((size*8192))
+ size=$((size + ( $RANDOM % 8192) ))
- oldin=$(mktemp)
- cp "$cin" "$oldin"
- make_file "$cin" "client" $size
+ cinfail=$(mktemp)
+ make_file "$cinfail" "client" $size
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
- ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
- lret=$?
+ if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
+ size=$((RANDOM%16))
+ size=$((size+1))
+ size=$((size*2048))
- if [ "$test_linkfail" -eq 1 ];then
- cp "$oldin" "$cin"
- rm -f "$oldin"
+ sinfail=$(mktemp)
+ make_file "$sinfail" "server" $size
fi
- if [ $lret -ne 0 ]; then
- ret=$lret
- return
- fi
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
+ ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
+ lret=$?
}
chk_csum_nr()
@@ -541,6 +578,43 @@ chk_csum_nr()
fi
}
+chk_fail_nr()
+{
+ local mp_fail_nr_tx=$1
+ local mp_fail_nr_rx=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "ftx"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_tx" ]; then
+ echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - frx "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_rx" ]; then
+ echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
chk_join_nr()
{
local msg="$1"
@@ -590,6 +664,47 @@ chk_join_nr()
fi
if [ $checksum -eq 1 ]; then
chk_csum_nr
+ chk_fail_nr 0 0
+ fi
+}
+
+# a negative value for 'stale_max' means no upper bound:
+# for bidirectional transfer, if one peer sleep for a while
+# - as these tests do - we can have a quite high number of
+# stale/recover conversions, proportional to
+# sleep duration/ MPTCP-level RTX interval.
+chk_stale_nr()
+{
+ local ns=$1
+ local stale_min=$2
+ local stale_max=$3
+ local stale_delta=$4
+ local dump_stats
+ local stale_nr
+ local recover_nr
+
+ printf "%-39s %-18s" " " "stale"
+ stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
+ [ -z "$stale_nr" ] && stale_nr=0
+ recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
+ [ -z "$recover_nr" ] && recover_nr=0
+
+ if [ $stale_nr -lt $stale_min ] ||
+ [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
+ [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
+ echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
+ " expected stale in range [$stale_min..$stale_max]," \
+ " stale-recover delta $stale_delta "
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo $ns stats
+ ip netns exec $ns ip -s link show
+ ip netns exec $ns nstat -as | grep MPTcp
fi
}
@@ -801,6 +916,27 @@ chk_prio_nr()
fi
}
+chk_link_usage()
+{
+ local ns=$1
+ local link=$2
+ local out=$3
+ local expected_rate=$4
+ local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes`
+ local tx_total=`ls -l $out | awk '{print $5}'`
+ local tx_rate=$((tx_link * 100 / $tx_total))
+ local tolerance=5
+
+ printf "%-39s %-18s" " " "link usage"
+ if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
+ $tx_rate -gt $((expected_rate + $tolerance)) ]; then
+ echo "[fail] got $tx_rate% usage, expected $expected_rate%"
+ ret=1
+ else
+ echo "[ ok ]"
+ fi
+}
+
subflows_tests()
{
reset
@@ -918,20 +1054,101 @@ signal_address_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal invalid addresses" 1 1 1
chk_add_nr 3 3
+
+ # signal addresses race test
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_add_nr 4 4
}
link_failure_tests()
{
# accept and use add_addr with additional subflows and link loss
reset
+
+ # without any b/w limit each veth could spool the packets and get
+ # them acked at xmit time, so that the corresponding subflow will
+ # have almost always no outstanding pkts, the scheduler will pick
+ # always the first subflow and we will have hard time testing
+ # active backup and link switch-over.
+ # Let's set some arbitrary (low) virtual link limits.
+ init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "multiple flows, signal, link failure" 3 3 3
chk_add_nr 1 1
+ chk_stale_nr $ns2 1 5 1
+
+ # accept and use add_addr with additional subflows and link loss
+ # for bidirectional transfer
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 2
+ chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 1
+
+ # 2 subflows plus 1 backup subflow with a lossy link, backup
+ # will never be used
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ export FAILING_LINKS="1"
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 1
+ chk_join_nr "backup subflow unused, link failure" 2 2 2
+ chk_add_nr 1 1
+ chk_link_usage $ns2 ns2eth3 $cinsent 0
+
+ # 2 lossy links after half transfer, backup will get half of
+ # the traffic
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ export FAILING_LINKS="1 2"
+ run_tests $ns1 $ns2 10.0.1.1 1
+ chk_join_nr "backup flow used, multi links fail" 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 2 4 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
+
+ # use a backup subflow with the first subflow on a lossy link
+ # for bidirectional transfer
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 2
+ chk_join_nr "backup flow used, bidi, link failure" 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
}
add_addr_timeout_tests()
@@ -1530,6 +1747,55 @@ deny_join_id0_tests()
chk_join_nr "subflow and address allow join id0 2" 1 1 1
}
+fullmesh_tests()
+{
+ # fullmesh 1
+ # 2 fullmesh addrs in ns2, added before the connection,
+ # 1 non-fullmesh addr in ns1, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+ run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+ chk_join_nr "fullmesh test 2x1" 4 4 4
+ chk_add_nr 1 1
+
+ # fullmesh 2
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 1 fullmesh addr in ns2, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
+ chk_join_nr "fullmesh test 1x1" 3 3 3
+ chk_add_nr 1 1
+
+ # fullmesh 3
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 5
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 5
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ chk_join_nr "fullmesh test 1x2" 5 5 5
+ chk_add_nr 1 1
+
+ # fullmesh 4
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection,
+ # limit max_subflows to 4.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ chk_join_nr "fullmesh test 1x2, limited" 4 4 4
+ chk_add_nr 1 1
+}
+
all_tests()
{
subflows_tests
@@ -1545,6 +1811,7 @@ all_tests()
syncookies_tests
checksum_tests
deny_join_id0_tests
+ fullmesh_tests
}
usage()
@@ -1563,6 +1830,7 @@ usage()
echo " -k syncookies_tests"
echo " -S checksum_tests"
echo " -d deny_join_id0_tests"
+ echo " -m fullmesh_tests"
echo " -c capture pcap files"
echo " -C enable data checksum"
echo " -h help"
@@ -1598,7 +1866,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkdchCS' opt; do
+while getopts 'fsltra64bpkdmchCS' opt; do
case $opt in
f)
subflows_tests
@@ -1639,6 +1907,9 @@ while getopts 'fsltra64bpkdchCS' opt; do
d)
deny_join_id0_tests
;;
+ m)
+ fullmesh_tests
+ ;;
c)
;;
C)
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 115decfdc1ef..354784512748 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -25,7 +25,7 @@
static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
- fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+ fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
@@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
else if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else if (!strcmp(tok, "fullmesh"))
+ flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
else
error(1, errno,
"unknown flag %s", argv[arg]);
}
+ if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+ flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ error(1, errno, "error flag fullmesh");
+ }
+
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
rta->rta_len = RTA_LENGTH(4);
@@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
+ if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ printf("fullmesh");
+ flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
+ if (flags)
+ printf(",");
+ }
+
/* bump unknown flags, if any */
if (flags)
printf("0x%x", flags);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index fd63ebfe9a2b..910d8126af8f 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -22,8 +22,8 @@ usage() {
cleanup()
{
- rm -f "$cin" "$cout"
- rm -f "$sin" "$sout"
+ rm -f "$cout" "$sout"
+ rm -f "$large" "$small"
rm -f "$capout"
local netns
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index db4521335722..3653d6468c67 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -111,8 +111,8 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
static void sock_fanout_set_cbpf(int fd)
{
struct sock_filter bpf_filter[] = {
- BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */
- BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */
+ BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80), /* ldb [80] */
+ BPF_STMT(BPF_RET | BPF_A, 0), /* ret A */
};
struct sock_fprog bpf_prog;
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 170be65e0816..1cbfeb5052ec 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 8b42e8b04e0f..a59cb6a3c4f5 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -1,9 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
if [ $(id -u) != 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
ret=0
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100755
index 000000000000..e57bbfbc5208
--- /dev/null
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+
+netdev_check_for_carrier() {
+ local -r dev="$1"
+
+ for i in {1..5}; do
+ carrier="$(cat /sys/class/net/${dev}/carrier)"
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "carrier not ready yet..." >&2
+ sleep 1
+ else
+ echo "carrier ready" >&2
+ break
+ fi
+ done
+ echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+ local dev="$1"
+
+ # Fail hard if cannot turn on loopback mode for current NIC
+ ethtool -K "${dev}" loopback on || exit 1
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_macvlan_ns(){
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+ local -r addr="$5"
+
+ ip link add link "${link_dev}" dev "${ns_dev}" \
+ address "${ns_mac}" type macvlan
+ exit_code=$?
+ if [[ "${exit_code}" -ne 0 ]]; then
+ echo "setup_macvlan_ns failed"
+ exit $exit_code
+ fi
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ ip link set dev "${ns_dev}" netns "${ns_name}"
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+ if [[ -n "${addr}" ]]; then
+ ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+ fi
+
+ sleep 1
+}
+
+cleanup_macvlan_ns(){
+ while (( $# >= 2 )); do
+ ns_name="$1"
+ ns_dev="$2"
+ ip -netns "${ns_name}" link del dev "${ns_dev}"
+ ip netns del "${ns_name}"
+ shift 2
+ done
+}
+
+cleanup_loopback(){
+ local -r dev="$1"
+
+ ethtool -K "${dev}" loopback off
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_interrupt() {
+ # Use timer on host to trigger the network stack
+ # Also disable device interrupt to not depend on NIC interrupt
+ # Reduce test flakiness caused by unexpected interrupts
+ echo 100000 >"${FLUSH_PATH}"
+ echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+ setup_loopback_environment "${dev}"
+ setup_interrupt
+}
+
+cleanup() {
+ cleanup_loopback "${dev}"
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
new file mode 100644
index 000000000000..1003ddf7b3b2
--- /dev/null
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+setup_veth_ns() {
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+ ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+
+ ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ ip link add name server type veth peer name client
+
+ setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ local ns_name
+
+ for ns_name in client_ns server_ns; do
+ [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
+ done
+}
+
+setup() {
+ # no global init setup step needed
+ :
+}
+
+cleanup() {
+ cleanup_ns
+}
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 75ada17ac061..aebaab8ce44c 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -193,6 +193,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv6_HS_NETWORK=cafe
@@ -543,18 +546,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index ad7a9fc59934..1003119773e5 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -163,6 +163,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv4_HS_NETWORK=10.0.0
@@ -464,18 +467,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
index 68708f5e26a0..b9b06ef80d88 100755
--- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -164,6 +164,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv6_HS_NETWORK=cafe
@@ -472,18 +475,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644
index 000000000000..710ac956bdb3
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN 40
+#define TOEPLITZ_KEY_MAX_LEN 60
+
+#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport = 8000;
+static int cfg_family = AF_INET6;
+static char *cfg_ifname = "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type = SOCK_STREAM;
+static int cfg_timeout_msec = 1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+ int fd;
+ char *mmap;
+ int idx;
+ int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+ const unsigned char *key)
+{
+ int i, bit, ret = 0;
+ uint32_t key32;
+
+ key32 = ntohl(*((uint32_t *)key));
+ key += 4;
+
+ for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+ for (bit = 7; bit >= 0; bit--) {
+ if (four_tuple[i] & (1 << bit))
+ ret ^= key32;
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+ int queue = rx_hash % cfg_num_queues;
+
+ log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+ if (rx_irq_cpus[queue] != cpu) {
+ log_verbose(". error: rss cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+ int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+ log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+ if (rps_silo_to_cpu[silo] != cpu) {
+ log_verbose(". error: rps cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+ const char *addrs, int addr_len)
+{
+ char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+ uint16_t *ports;
+
+ if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+ !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+ error(1, 0, "address parse error");
+
+ ports = (void *)addrs + (addr_len * 2);
+ log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+ cpu, rx_hash, saddr, daddr,
+ ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+ unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+ uint32_t rx_hash_sw;
+ const char *addrs;
+ int addr_len;
+
+ if (cfg_family == AF_INET) {
+ addr_len = sizeof(struct in_addr);
+ addrs = pkt + offsetof(struct iphdr, saddr);
+ } else {
+ addr_len = sizeof(struct in6_addr);
+ addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+ }
+
+ memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+ rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+ if (cfg_verbose)
+ log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+ if (rx_hash != rx_hash_sw) {
+ log_verbose(" != expected 0x%x\n", rx_hash_sw);
+ frames_error++;
+ return;
+ }
+
+ log_verbose(" OK");
+ if (cfg_num_queues)
+ verify_rss(rx_hash, cpu);
+ else if (cfg_num_rps_cpus)
+ verify_rps(rx_hash, cpu);
+ log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+ struct tpacket3_hdr *hdr = (void *)frame;
+
+ if (hdr->hv1.tp_rxhash)
+ verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+ ring->cpu);
+ else
+ frames_nohash++;
+
+ return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+ struct tpacket_block_desc *block;
+ char *frame;
+ int i;
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+ return;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+
+ for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+ frame = recv_frame(ring, frame);
+ frames_received++;
+ }
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+ int i;
+
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+ recv_block(&rings[i]);
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+ frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+ req3.tp_retire_blk_tov = cfg_timeout_msec;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+ req3.tp_block_nr = 2;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+ error(1, errno, "setsockopt PACKET_RX_RING");
+
+ ring_block_sz = req3.tp_block_size;
+ ring_block_nr = req3.tp_block_nr;
+
+ ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+ if (ring == MAP_FAILED)
+ error(1, 0, "mmap failed");
+
+ return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+ uint8_t proto;
+
+ proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ if (cfg_family == AF_INET)
+ __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+ sizeof(struct iphdr) + off_dport);
+ else
+ __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+ sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+ struct fanout_args args = {
+ .id = 1,
+ .type_flags = PACKET_FANOUT_CPU,
+ .max_num_members = RSS_MAX_CPUS
+ };
+ struct sockaddr_ll ll = { 0 };
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket creation failed");
+
+ val = TPACKET_V3;
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+ error(1, errno, "setsockopt PACKET_VERSION");
+ *ring = setup_ring(fd);
+
+ /* block packets until all rings are added to the fanout group:
+ * else packets can arrive during setup and get misclassified
+ */
+ set_filter_null(fd);
+
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = if_nametoindex(cfg_ifname);
+ ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+ if (bind(fd, (void *)&ll, sizeof(ll)))
+ error(1, errno, "bind");
+
+ /* must come after bind: verifies all programs in group match */
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+ /* on failure, retry using old API if that is sufficient:
+ * it has a hard limit of 256 sockets, so only try if
+ * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+ * in this API, the third argument is left implicit.
+ */
+ if (cfg_num_queues || num_cpus > 256 ||
+ setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &args, sizeof(uint32_t)))
+ error(1, errno, "setsockopt PACKET_FANOUT cpu");
+ }
+
+ return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+ int fd, val;
+
+ fd = socket(cfg_family, cfg_type, 0);
+ if (fd == -1)
+ error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+ val = 1 << 20;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+
+ return fd;
+}
+
+static void setup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ rings[i].cpu = i;
+ rings[i].fd = create_ring(&rings[i].mmap);
+ }
+
+ /* accept packets once all rings in the fanout group are up */
+ for (i = 0; i < num_cpus; i++)
+ set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+ error(1, errno, "munmap");
+ if (close(rings[i].fd))
+ error(1, errno, "close");
+ }
+}
+
+static void parse_cpulist(const char *arg)
+{
+ do {
+ rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+ arg = strchr(arg, ',');
+ if (!arg)
+ break;
+ arg++; // skip ','
+ } while (1);
+}
+
+static void show_cpulist(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_queues; i++)
+ fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_rps_cpus; i++)
+ fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+ int i, ret, off;
+
+ if (slen < TOEPLITZ_STR_MIN_LEN ||
+ slen > TOEPLITZ_STR_MAX_LEN + 1)
+ error(1, 0, "invalid toeplitz key");
+
+ for (i = 0, off = 0; off < slen; i++, off += 3) {
+ ret = sscanf(str + off, "%hhx", &key[i]);
+ if (ret != 1)
+ error(1, 0, "key parse error at %d off %d len %d",
+ i, off, slen);
+ }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+ unsigned long bitmap;
+ int i;
+
+ bitmap = strtoul(arg, NULL, 0);
+
+ if (bitmap & ~(RPS_MAX_CPUS - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ bitmap, RPS_MAX_CPUS - 1);
+
+ for (i = 0; i < RPS_MAX_CPUS; i++)
+ if (bitmap & 1UL << i)
+ rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"dport", required_argument, 0, 'd'},
+ {"cpus", required_argument, 0, 'C'},
+ {"key", required_argument, 0, 'k'},
+ {"iface", required_argument, 0, 'i'},
+ {"ipv4", no_argument, 0, '4'},
+ {"ipv6", no_argument, 0, '6'},
+ {"sink", no_argument, 0, 's'},
+ {"tcp", no_argument, 0, 't'},
+ {"timeout", required_argument, 0, 'T'},
+ {"udp", no_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"rps", required_argument, 0, 'r'},
+ {0, 0, 0, 0}
+ };
+ bool have_toeplitz = false;
+ int index, c;
+
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+ break;
+ case '6':
+ cfg_family = AF_INET6;
+ break;
+ case 'C':
+ parse_cpulist(optarg);
+ break;
+ case 'd':
+ cfg_dport = strtol(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'k':
+ parse_toeplitz_key(optarg, strlen(optarg),
+ toeplitz_key);
+ have_toeplitz = true;
+ break;
+ case 'r':
+ parse_rps_bitmap(optarg);
+ break;
+ case 's':
+ cfg_sink = true;
+ break;
+ case 't':
+ cfg_type = SOCK_STREAM;
+ break;
+ case 'T':
+ cfg_timeout_msec = strtol(optarg, NULL, 0);
+ break;
+ case 'u':
+ cfg_type = SOCK_DGRAM;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!have_toeplitz)
+ error(1, 0, "Must supply rss key ('-k')");
+
+ num_cpus = get_nprocs();
+ if (num_cpus > RSS_MAX_CPUS)
+ error(1, 0, "increase RSS_MAX_CPUS");
+
+ if (cfg_num_queues && cfg_num_rps_cpus)
+ error(1, 0,
+ "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+ if (cfg_verbose) {
+ show_cpulist();
+ show_silos();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const int min_tests = 10;
+ int fd_sink = -1;
+
+ parse_opts(argc, argv);
+
+ if (cfg_sink)
+ fd_sink = setup_sink();
+
+ setup_rings();
+ process_rings();
+ cleanup_rings();
+
+ if (cfg_sink && close(fd_sink))
+ error(1, errno, "close sink");
+
+ if (frames_received - frames_nohash < min_tests)
+ error(1, 0, "too few frames for verification");
+
+ return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755
index 000000000000..0a49907cd4fe
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+ echo $(ethtool -x "${DEV}" |
+ egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+ cut -d: -f2- |
+ awk '{$1=$1};1' |
+ tr ' ' '\n' |
+ sort -u |
+ wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+ CPUS=""
+ # sort so that irq 2 is read before irq 10
+ SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+ # Consider only as many queues as RSS actually uses. We assume that
+ # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+ RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+ RXQ_COUNT=0
+
+ for i in ${SORTED_IRQS}
+ do
+ [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+ # lookup relevant IRQs by action name
+ [[ -e "$i/actions" ]] || continue
+ cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+ irqname=$(<"$i/actions")
+
+ # does the IRQ get called
+ irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+ [[ -n "${irqcount}" ]] || continue
+
+ # lookup CPU
+ irq=$(basename "$i")
+ cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+ if [[ -z "${CPUS}" ]]; then
+ CPUS="${cpu}"
+ else
+ CPUS="${CPUS},${cpu}"
+ fi
+ RXQ_COUNT=$((RXQ_COUNT+1))
+ done
+
+ echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+ echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+ CMD=""
+ for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+ do
+ CMD="${CMD} echo $1 > ${i};"
+ done
+
+ echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+ echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+check_nic_rxhash_enabled() {
+ local -r pattern="receive-hashing:\ on"
+
+ ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+ local prog=$0
+ shift 1
+
+ while [[ "$1" =~ "-" ]]; do
+ if [[ "$1" = "-irq_prefix" ]]; then
+ shift
+ IRQ_PATTERN="^$1-[0-9]*$"
+ elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+ PROTO_FLAG="$1"
+ elif [[ "$1" = "-4" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP4}"
+ CLIENT_IP="${CLIENT_IP4}"
+ elif [[ "$1" = "-6" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP6}"
+ CLIENT_IP="${CLIENT_IP6}"
+ elif [[ "$1" = "-rss" ]]; then
+ TEST_RSS=true
+ elif [[ "$1" = "-rps" ]]; then
+ shift
+ RPS_MAP="$1"
+ elif [[ "$1" = "-i" ]]; then
+ shift
+ DEV="$1"
+ else
+ die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+ [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+ fi
+ shift
+ done
+}
+
+setup() {
+ setup_loopback_environment "${DEV}"
+
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${DEV}" server_ns server \
+ "${SERVER_MAC}" "${SERVER_IP}"
+ setup_macvlan_ns "${DEV}" client_ns client \
+ "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+ cleanup_macvlan_ns server_ns server client_ns client
+ cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+ # RPS/RFS must be disabled because they move packets between cpus,
+ # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+ eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+ eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -r "0x${RPS_MAP}" -s -v &
+else
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+ echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755
index 000000000000..2fef34f4aba1
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz_client.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+ expiration=$((SECONDS+20))
+ while [[ "${SECONDS}" -lt "${expiration}" ]]
+ do
+ if [[ "${PROTO}" == "-u" ]]; then
+ echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+ else
+ echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+ fi
+ sleep 0.001
+ done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index 66354cdd5ce4..2d10ccac898a 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -28,12 +28,15 @@
# These tests provide an easy way to flip the expected result of any
# of these behaviors for testing kernel patches that change them.
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# nettest can be run from PATH or from same directory as this selftest
if ! which nettest >/dev/null; then
PATH=$PWD:$PATH
if ! which nettest >/dev/null; then
echo "'nettest' command not found; skipping tests"
- exit 0
+ exit $ksft_skip
fi
fi
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 11d7cdb898c0..19eac3e44c06 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST
readonly BM_NET_V4=192.168.1.
readonly BM_NET_V6=2001:db8::
-readonly NPROCS=`nproc`
+readonly CPUS=`nproc`
ret=0
cleanup() {
@@ -75,6 +75,29 @@ chk_tso_flag() {
__chk_flag "$1" $2 $3 tcp-segmentation-offload
}
+chk_channels() {
+ local msg="$1"
+ local target=$2
+ local rx=$3
+ local tx=$4
+
+ local dev=veth$target
+
+ local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep RX: | tail -n 1 | awk '{print $2}' `
+ local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep TX: | tail -n 1 | awk '{print $2}'`
+ local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep Combined: | tail -n 1 | awk '{print $2}'`
+
+ printf "%-60s" "$msg"
+ if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+ echo " ok "
+ else
+ echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+ fi
+}
+
chk_gro() {
local msg="$1"
local expected=$2
@@ -107,11 +130,100 @@ chk_gro() {
fi
}
+__change_channels()
+{
+ local cur_cpu
+ local end=$1
+ local cur
+ local i
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ for i in `seq 1 $CPUS`; do
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+ ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+ done
+
+ for i in `seq 1 $((CPUS - 1))`; do
+ cur_cpu=$((CPUS - $i))
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+ ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+ done
+ done
+}
+
+__send_data() {
+ local end=$1
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+ done
+}
+
+do_stress() {
+ local end
+ printf -v end '%(%s)T'
+ end=$((end + $STRESS))
+
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+ ip netns exec $NS_DST ./udpgso_bench_rx &
+ local rx_pid=$!
+
+ echo "Running stress test for $STRESS seconds..."
+ __change_channels $end &
+ local ch_pid=$!
+ __send_data $end &
+ local data_pid_1=$!
+ __send_data $end &
+ local data_pid_2=$!
+ __send_data $end &
+ local data_pid_3=$!
+ __send_data $end &
+ local data_pid_4=$!
+
+ wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+ kill -9 $rx_pid
+ echo "done"
+
+ # restore previous setting
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+ echo "Usage: $0 [-h] [-s <seconds>]"
+ echo -e "\t-h: show this help"
+ echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "s")
+ STRESS=$OPTARG
+ ;;
+ esac
+done
+
if [ ! -f ../bpf/xdp_dummy.o ]; then
echo "Missing xdp_dummy helper. Build bpf selftest first"
exit 1
fi
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
create_ns
chk_gro_flag "default - gro flag" $SRC off
chk_gro_flag " - peer gro flag" $DST off
@@ -134,6 +246,8 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+chk_channels "default channels" $DST 1 1
+
ip -n $NS_DST link set dev veth$DST down
ip netns exec $NS_DST ethtool -K veth$DST gro on
chk_gro_flag "with gro enabled on link down - gro flag" $DST on
@@ -147,6 +261,56 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "setting tx channels" $DST 1 2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+ chk_channels "setting both rx and tx channels" $DST 3 3
+ CUR_RX=3
+ CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+ # this also tests queues nr reduction
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+ printf "%-60s" "bad setting: XDP with RX nr less than TX"
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+
+ # the following tests will run with multiple channels active
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null
+ printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ CUR_RX=2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+ ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
chk_gro_flag "with xdp attached - gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
@@ -167,10 +331,27 @@ chk_gro_flag " - after gro on xdp off, gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
chk_tso_flag " - tso flag" $SRC on
chk_tso_flag " - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 1
+ chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
ip -n $NS_DST link set dev veth$DST up
ip -n $NS_SRC link set dev veth$SRC up
chk_gro " - aggregation" 1
+if [ $CPUS -gt 1 ]; then
+ [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+ ip -n $NS_DST link set dev veth$DST down
+ ip -n $NS_SRC link set dev veth$SRC down
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "increasing tx channels with device down" $DST 2 2
+ ip -n $NS_DST link set dev veth$DST up
+ ip -n $NS_SRC link set dev veth$SRC up
+fi
+
ip netns exec $NS_DST ethtool -K veth$DST gro off
ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
chk_gro "aggregation again with default and TSO off" 10
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 18b982d611de..865d53c1781c 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -3,6 +3,9 @@
# This test is designed for testing the new VRF strict_mode functionality.
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ret=0
# identifies the "init" network namespace which is often called root network
@@ -371,18 +374,18 @@ vrf_strict_mode_check_support()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &> /dev/null