diff options
Diffstat (limited to 'tools/testing/selftests/net/mptcp')
-rw-r--r-- | tools/testing/selftests/net/mptcp/.gitignore | 2 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/Makefile | 10 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/config | 30 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/diag.sh | 361 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/mptcp_connect.c | 860 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_connect.sh | 718 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/mptcp_inq.c | 599 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_join.sh | 3704 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/mptcp_lib.sh | 668 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 866 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 360 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/pm_netlink.sh | 300 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 989 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/settings | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/simult_flows.sh | 298 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/userspace_pm.sh | 897 |
16 files changed, 10038 insertions, 626 deletions
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 260336d5f0b1..49daae73c41e 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_inq +mptcp_sockopt pm_nl_ctl *.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index f50976ee7d44..7b936a926859 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -1,15 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 top_srcdir = ../../../../.. -KSFT_KHDR_INSTALL := 1 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh +TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ + simult_flows.sh mptcp_sockopt.sh userspace_pm.sh -TEST_GEN_FILES = mptcp_connect pm_nl_ctl +TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq -TEST_FILES := settings +TEST_FILES := mptcp_lib.sh settings EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2499824d9e1c..4f80014cae49 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,34 @@ +CONFIG_KALLSYMS=y CONFIG_MPTCP=y +CONFIG_IPV6=y CONFIG_MPTCP_IPV6=y +CONFIG_INET_DIAG=m +CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_SYN_COOKIES=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NF_TABLES=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_TPROXY=m +CONFIG_NFT_SOCKET=m +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m +CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_SCH_INGRESS=m diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh new file mode 100755 index 000000000000..776d43a6922d --- /dev/null +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -0,0 +1,361 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + +ns="" +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) +ret=0 + +flush_pids() +{ + # mptcp_connect in join mode will sleep a bit before completing, + # give it some time + sleep 1.1 + + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null + + for _ in $(seq $((timeout_poll * 10))); do + [ -z "$(ip netns pids "${ns}")" ] && break + sleep 0.1 + done +} + +# This function is used in the cleanup trap +#shellcheck disable=SC2317 +cleanup() +{ + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null + + mptcp_lib_ns_exit "${ns}" +} + +mptcp_lib_check_mptcp +mptcp_lib_check_tools ip ss + +get_msk_inuse() +{ + ip netns exec $ns cat /proc/net/protocols | awk '$1~/^MPTCP$/{print $3}' +} + +__chk_nr() +{ + local command="$1" + local expected=$2 + local msg="$3" + local skip="${4-SKIP}" + local nr + + nr=$(eval $command) + + mptcp_lib_print_title "$msg" + if [ "$nr" != "$expected" ]; then + if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then + mptcp_lib_pr_skip "Feature probably not supported" + mptcp_lib_result_skip "${msg}" + else + mptcp_lib_pr_fail "expected $expected found $nr" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi + else + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + fi +} + +__chk_msk_nr() +{ + local condition=$1 + shift 1 + + __chk_nr "ss -inmHMN $ns | $condition" "$@" +} + +chk_msk_nr() +{ + __chk_msk_nr "grep -c token:" "$@" +} + +chk_listener_nr() +{ + local expected=$1 + local msg="$2" + + __chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0 + __chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows" +} + +wait_msk_nr() +{ + local condition="grep -c token:" + local expected=$1 + local timeout=20 + local msg nr + local max=0 + local i=0 + + shift 1 + msg=$* + + while [ $i -lt $timeout ]; do + nr=$(ss -inmHMN $ns | $condition) + [ $nr == $expected ] && break; + [ $nr -gt $max ] && max=$nr + i=$((i + 1)) + sleep 1 + done + + mptcp_lib_print_title "$msg" + if [ $i -ge $timeout ]; then + mptcp_lib_pr_fail "timeout while expecting $expected max $max last $nr" + mptcp_lib_result_fail "${msg} # timeout" + ret=${KSFT_FAIL} + elif [ $nr != $expected ]; then + mptcp_lib_pr_fail "expected $expected found $nr" + mptcp_lib_result_fail "${msg} # unexpected result" + ret=${KSFT_FAIL} + else + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + fi +} + +chk_msk_fallback_nr() +{ + __chk_msk_nr "grep -c fallback" "$@" +} + +chk_msk_remote_key_nr() +{ + __chk_msk_nr "grep -c remote_key" "$@" +} + +__chk_listen() +{ + local filter="$1" + local expected=$2 + local msg="$3" + + __chk_nr "ss -N $ns -Ml '$filter' | grep -c LISTEN" "$expected" "$msg" 0 +} + +chk_msk_listen() +{ + lport=$1 + + # destination port search should always return empty list + __chk_listen "dport $lport" 0 "listen match for dport $lport" + + # should return 'our' mptcp listen socket + __chk_listen "sport $lport" 1 "listen match for sport $lport" + + __chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport" + + __chk_listen "" 1 "all listen sockets" + + nr=$(ss -Ml $filter | wc -l) +} + +chk_msk_inuse() +{ + local expected=$1 + local msg="....chk ${2:-${expected}} msk in use" + local listen_nr + + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi + + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) + expected=$((expected + listen_nr)) + + for _ in $(seq 10); do + if [ "$(get_msk_inuse)" -eq $expected ]; then + break + fi + sleep 0.1 + done + + __chk_nr get_msk_inuse $expected "${msg}" 0 +} + +# $1: cestab nr +chk_msk_cestab() +{ + local expected=$1 + local msg="....chk ${2:-${expected}} cestab" + + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi + + __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ + "${expected}" "${msg}" "" +} + +msk_info_get_value() +{ + local port="${1}" + local info="${2}" + + ss -N "${ns}" -inHM dport "${port}" | \ + mptcp_lib_get_info_value "${info}" "${info}" +} + +chk_msk_info() +{ + local port="${1}" + local info="${2}" + local cnt="${3}" + local msg="....chk ${info}" + local delta_ms=250 # half what we waited before, just to be sure + local now + + now=$(msk_info_get_value "${port}" "${info}") + + mptcp_lib_print_title "${msg}" + if { [ -z "${cnt}" ] || [ -z "${now}" ]; } && + ! mptcp_lib_expect_all_features; then + mptcp_lib_pr_skip "Feature probably not supported" + mptcp_lib_result_skip "${msg}" + elif [ "$((cnt + delta_ms))" -lt "${now}" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "value of ${info} changed by $((now - cnt))ms," \ + "expected at least ${delta_ms}ms" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + +chk_last_time_info() +{ + local port="${1}" + local data_sent data_recv ack_recv + + data_sent=$(msk_info_get_value "${port}" "last_data_sent") + data_recv=$(msk_info_get_value "${port}" "last_data_recv") + ack_recv=$(msk_info_get_value "${port}" "last_ack_recv") + + sleep 0.5 # wait to check after if the timestamps difference + + chk_msk_info "${port}" "last_data_sent" "${data_sent}" + chk_msk_info "${port}" "last_data_recv" "${data_recv}" + chk_msk_info "${port}" "last_ack_recv" "${ack_recv}" +} + +wait_connected() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break + sleep 0.1 + done +} + +trap cleanup EXIT +mptcp_lib_ns_init ns + +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ + 0.0.0.0 >/dev/null & +mptcp_lib_wait_local_port_listen $ns 10000 +chk_msk_nr 0 "no msk on netns creation" +chk_msk_listen 10000 + +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ + 127.0.0.1 >/dev/null & +wait_connected $ns 10000 +chk_msk_nr 2 "after MPC handshake " +chk_last_time_info 10000 +chk_msk_remote_key_nr 2 "....chk remote_key" +chk_msk_fallback_nr 0 "....chk no fallback" +chk_msk_inuse 2 +chk_msk_cestab 2 +flush_pids + +chk_msk_inuse 0 "2->0" +chk_msk_cestab 0 "2->0" + +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ + 0.0.0.0 >/dev/null & +mptcp_lib_wait_local_port_listen $ns 10001 +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \ + 127.0.0.1 >/dev/null & +wait_connected $ns 10001 +chk_msk_fallback_nr 1 "check fallback" +chk_msk_inuse 1 +chk_msk_cestab 1 +flush_pids + +chk_msk_inuse 0 "1->0" +chk_msk_cestab 0 "1->0" + +NR_CLIENTS=100 +for I in $(seq 1 $NR_CLIENTS); do + echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -l -w 20 \ + -t ${timeout_poll} 0.0.0.0 >/dev/null & +done +mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001)) + +for I in $(seq 1 $NR_CLIENTS); do + echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -w 20 \ + -t ${timeout_poll} 127.0.0.1 >/dev/null & +done + +wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +chk_msk_inuse $((NR_CLIENTS*2)) "many" +chk_msk_cestab $((NR_CLIENTS*2)) "many" +flush_pids + +chk_msk_inuse 0 "many->0" +chk_msk_cestab 0 "many->0" + +chk_listener_nr 0 "no listener sockets" +NR_SERVERS=100 +for I in $(seq 1 $NR_SERVERS); do + ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \ + -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 & +done +mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001)) + +chk_listener_nr $NR_SERVERS "many listener sockets" + +# graceful termination +for I in $(seq 1 $NR_SERVERS); do + echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 & +done +flush_pids + +mptcp_lib_result_print_all_tap +exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cedee5b952ba..d2043ec3bf6d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -6,14 +6,19 @@ #include <limits.h> #include <fcntl.h> #include <string.h> +#include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <strings.h> +#include <signal.h> #include <unistd.h> +#include <time.h> +#include <sys/ioctl.h> #include <sys/poll.h> +#include <sys/random.h> #include <sys/sendfile.h> #include <sys/stat.h> #include <sys/socket.h> @@ -24,6 +29,8 @@ #include <netinet/in.h> #include <linux/tcp.h> +#include <linux/time_types.h> +#include <linux/sockios.h> extern int optind; @@ -36,6 +43,7 @@ extern int optind; static int poll_timeout = 10 * 1000; static bool listen_mode; +static bool quit; enum cfg_mode { CFG_MODE_POLL, @@ -43,31 +51,108 @@ enum cfg_mode { CFG_MODE_SENDFILE, }; +enum cfg_peek { + CFG_NONE_PEEK, + CFG_WITH_PEEK, + CFG_AFTER_PEEK, +}; + static enum cfg_mode cfg_mode = CFG_MODE_POLL; +static enum cfg_peek cfg_peek = CFG_NONE_PEEK; static const char *cfg_host; static const char *cfg_port = "12000"; static int cfg_sock_proto = IPPROTO_MPTCP; -static bool tcpulp_audit; static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; +static bool cfg_remove; +static unsigned int cfg_time; +static unsigned int cfg_do_w; +static int cfg_wait; +static uint32_t cfg_mark; +static char *cfg_input; +static int cfg_repeat = 1; +static int cfg_truncate; +static int cfg_rcv_trunc; + +struct cfg_cmsg_types { + unsigned int cmsg_enabled:1; + unsigned int timestampns:1; + unsigned int tcp_inq:1; +}; + +struct cfg_sockopt_types { + unsigned int transparent:1; + unsigned int mptfo:1; +}; + +struct tcp_inq_state { + unsigned int last; + bool expect_eof; +}; + +struct wstate { + char buf[8192]; + unsigned int len; + unsigned int off; + unsigned int total_len; +}; + +static struct tcp_inq_state tcp_inq; + +static struct cfg_cmsg_types cfg_cmsg_types; +static struct cfg_sockopt_types cfg_sockopt_types; static void die_usage(void) { - fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" - "[-l] connect_address\n"); + fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] " + "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] " + "[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); - fprintf(stderr, "\t-t num -- set poll timeout to num\n"); - fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); - fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); + fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); + fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount " + "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP " + "fastclose at close/shutdown. If offset is negative, expect the peer to close before " + "all the local data as been sent, thus toleration errors on write and EPIPE signals\n"); + fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); + fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " + "incoming connections, in client mode, disconnect and reconnect to the server\n"); + fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " + "-- for MPJ tests\n"); + fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); + fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); + fprintf(stderr, "\t-M mark -- set socket packet mark\n"); + fprintf(stderr, "\t-o option -- test sockopt <option>\n"); fprintf(stderr, "\t-p num -- use port num\n"); - fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); - fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); - fprintf(stderr, "\t-u -- check mptcp ulp\n"); + fprintf(stderr, + "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); + fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes " + "-- for remove addr tests\n"); + fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); + fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); + fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); + fprintf(stderr, "\t-t num -- set poll timeout to num\n"); + fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); + fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); exit(1); } +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static void handle_signal(int nr) +{ + quit = true; +} + static const char *getxinfo_strerr(int err) { if (err == EAI_SYSTEM) @@ -129,10 +214,81 @@ static void set_sndbuf(int fd, unsigned int size) } } +static void set_mark(int fd, uint32_t mark) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); + if (err) { + perror("set SO_MARK"); + exit(1); + } +} + +static void set_transparent(int fd, int pf) +{ + int one = 1; + + switch (pf) { + case AF_INET: + if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) + perror("IP_TRANSPARENT"); + break; + case AF_INET6: + if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) + perror("IPV6_TRANSPARENT"); + break; + } +} + +static void set_mptfo(int fd, int pf) +{ + int qlen = 25; + + if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) + perror("TCP_FASTOPEN"); +} + +static int do_ulp_so(int sock, const char *name) +{ + return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); +} + +#define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line) +static void sock_test_tcpulp(int sock, int proto, unsigned int line) +{ + socklen_t buflen = 8; + char buf[8] = ""; + int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen); + + if (ret != 0) + X("getsockopt"); + + if (buflen > 0) { + if (strcmp(buf, "mptcp") != 0) + xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line); + ret = do_ulp_so(sock, "tls"); + if (ret == 0) + X("setsockopt"); + } else if (proto == IPPROTO_MPTCP) { + ret = do_ulp_so(sock, "tls"); + if (ret != -1) + X("setsockopt"); + } + + ret = do_ulp_so(sock, "mptcp"); + if (ret != -1) + X("setsockopt"); + +#undef X +} + +#define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__) + static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, @@ -152,10 +308,18 @@ static int sock_listen_mptcp(const char * const listenaddr, if (sock < 0) continue; + SOCK_TEST_TCPULP(sock, cfg_sock_proto); + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) perror("setsockopt"); + if (cfg_sockopt_types.transparent) + set_transparent(sock, pf); + + if (cfg_sockopt_types.mptfo) + set_mptfo(sock, pf); + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -171,60 +335,30 @@ static int sock_listen_mptcp(const char * const listenaddr, return sock; } + SOCK_TEST_TCPULP(sock, cfg_sock_proto); + if (listen(sock, 20)) { perror("listen"); close(sock); return -1; } - return sock; -} + SOCK_TEST_TCPULP(sock, cfg_sock_proto); -static bool sock_test_tcpulp(const char * const remoteaddr, - const char * const port) -{ - struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, - .ai_socktype = SOCK_STREAM, - }; - struct addrinfo *a, *addr; - int sock = -1, ret = 0; - bool test_pass = false; - - hints.ai_family = AF_INET; - - xgetaddrinfo(remoteaddr, port, &hints, &addr); - for (a = addr; a; a = a->ai_next) { - sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP); - if (sock < 0) { - perror("socket"); - continue; - } - ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp", - sizeof("mptcp")); - if (ret == -1 && errno == EOPNOTSUPP) - test_pass = true; - close(sock); - - if (test_pass) - break; - if (!ret) - fprintf(stderr, - "setsockopt(TCP_ULP) returned 0\n"); - else - perror("setsockopt(TCP_ULP)"); - } - return test_pass; + return sock; } static int sock_connect_mptcp(const char * const remoteaddr, - const char * const port, int proto) + const char * const port, int proto, + struct addrinfo **peer, + int infd, struct wstate *winfo) { struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; + int syn_copied = 0; int sock = -1; hints.ai_family = pf; @@ -237,15 +371,44 @@ static int sock_connect_mptcp(const char * const remoteaddr, continue; } - if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) - break; /* success */ + SOCK_TEST_TCPULP(sock, proto); - perror("connect()"); - close(sock); - sock = -1; + if (cfg_mark) + set_mark(sock, cfg_mark); + + if (cfg_sockopt_types.mptfo) { + if (!winfo->total_len) + winfo->total_len = winfo->len = read(infd, winfo->buf, + sizeof(winfo->buf)); + + syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, + a->ai_addr, a->ai_addrlen); + if (syn_copied >= 0) { + winfo->off = syn_copied; + winfo->len -= syn_copied; + *peer = a; + break; /* success */ + } + } else { + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { + *peer = a; + break; /* success */ + } + } + if (cfg_sockopt_types.mptfo) { + perror("sendto()"); + close(sock); + sock = -1; + } else { + perror("connect()"); + close(sock); + sock = -1; + } } freeaddrinfo(addr); + if (sock != -1) + SOCK_TEST_TCPULP(sock, proto); return sock; } @@ -262,9 +425,12 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (cfg_join && first && do_w > 100) do_w = 100; + if (cfg_remove && do_w > cfg_do_w) + do_w = cfg_do_w; + bw = write(fd, buf, do_w); if (bw < 0) - perror("write"); + return bw; /* let the join handshake complete, before going on */ if (cfg_join && first) { @@ -272,6 +438,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) first = false; } + if (cfg_remove) + usleep(200000); + return bw; } @@ -296,8 +465,105 @@ static size_t do_write(const int fd, char *buf, const size_t len) return offset; } +static void process_cmsg(struct msghdr *msgh) +{ + struct __kernel_timespec ts; + bool inq_found = false; + bool ts_found = false; + unsigned int inq = 0; + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { + memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); + ts_found = true; + continue; + } + if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { + memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); + inq_found = true; + continue; + } + + } + + if (cfg_cmsg_types.timestampns) { + if (!ts_found) + xerror("TIMESTAMPNS not present\n"); + } + + if (cfg_cmsg_types.tcp_inq) { + if (!inq_found) + xerror("TCP_INQ not present\n"); + + if (inq > 1024) + xerror("tcp_inq %u is larger than one kbyte\n", inq); + tcp_inq.last = inq; + } +} + +static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) +{ + char msg_buf[8192]; + struct iovec iov = { + .iov_base = buf, + .iov_len = len, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = msg_buf, + .msg_controllen = sizeof(msg_buf), + }; + int flags = 0; + unsigned int last_hint = tcp_inq.last; + int ret = recvmsg(fd, &msg, flags); + + if (ret <= 0) { + if (ret == 0 && tcp_inq.expect_eof) + return ret; + + if (ret == 0 && cfg_cmsg_types.tcp_inq) + if (last_hint != 1 && last_hint != 0) + xerror("EOF but last tcp_inq hint was %u\n", last_hint); + + return ret; + } + + if (tcp_inq.expect_eof) + xerror("expected EOF, last_hint %u, now %u\n", + last_hint, tcp_inq.last); + + if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) + xerror("got %lu bytes of cmsg data, expected 0\n", + (unsigned long)msg.msg_controllen); + + if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) + xerror("%s\n", "got no cmsg data"); + + if (msg.msg_controllen) + process_cmsg(&msg); + + if (cfg_cmsg_types.tcp_inq) { + if ((size_t)ret < len && last_hint > (unsigned int)ret) { + if (ret + 1 != (int)last_hint) { + int next = read(fd, msg_buf, sizeof(msg_buf)); + + xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n", + ret, (unsigned int)len, last_hint, tcp_inq.last, next); + } else { + tcp_inq.expect_eof = true; + } + } + } + + return ret; +} + static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) { + int ret = 0; + char tmp[16384]; size_t cap = rand(); cap &= 0xffff; @@ -307,35 +573,62 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) else if (cap > len) cap = len; - return read(fd, buf, cap); + if (cfg_peek == CFG_WITH_PEEK) { + ret = recv(fd, buf, cap, MSG_PEEK); + ret = (ret < 0) ? ret : read(fd, tmp, ret); + } else if (cfg_peek == CFG_AFTER_PEEK) { + ret = recv(fd, buf, cap, MSG_PEEK); + ret = (ret < 0) ? ret : read(fd, buf, cap); + } else if (cfg_cmsg_types.cmsg_enabled) { + ret = do_recvmsg_cmsg(fd, buf, cap); + } else { + ret = read(fd, buf, cap); + } + + return ret; } -static void set_nonblock(int fd) +static void set_nonblock(int fd, bool nonblock) { int flags = fcntl(fd, F_GETFL); if (flags == -1) return; - fcntl(fd, F_SETFL, flags | O_NONBLOCK); + if (nonblock) + fcntl(fd, F_SETFL, flags | O_NONBLOCK); + else + fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); +} + +static void shut_wr(int fd) +{ + /* Close our write side, ev. give some time + * for address notification and/or checking + * the current status + */ + if (cfg_wait) + usleep(cfg_wait); + + shutdown(fd, SHUT_WR); } -static int copyfd_io_poll(int infd, int peerfd, int outfd) +static int copyfd_io_poll(int infd, int peerfd, int outfd, + bool *in_closed_after_out, struct wstate *winfo) { struct pollfd fds = { .fd = peerfd, .events = POLLIN | POLLOUT, }; - unsigned int woff = 0, wlen = 0; - char wbuf[8192]; + unsigned int total_wlen = 0, total_rlen = 0; - set_nonblock(peerfd); + set_nonblock(peerfd, true); for (;;) { char rbuf[8192]; ssize_t len; - if (fds.events == 0) + if (fds.events == 0 || quit) break; switch (poll(&fds, 1, poll_timeout)) { @@ -352,42 +645,65 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } if (fds.revents & POLLIN) { - len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); + ssize_t rb = sizeof(rbuf); + + /* limit the total amount of read data to the trunc value*/ + if (cfg_truncate > 0) { + if (rb + total_rlen > cfg_truncate) + rb = cfg_truncate - total_rlen; + len = read(peerfd, rbuf, rb); + } else { + len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); + } if (len == 0) { /* no more data to receive: * peer has closed its write side */ fds.events &= ~POLLIN; - if ((fds.events & POLLOUT) == 0) + if ((fds.events & POLLOUT) == 0) { + *in_closed_after_out = true; /* and nothing more to send */ break; + } /* Else, still have data to transmit */ } else if (len < 0) { + if (cfg_rcv_trunc) + return 0; perror("read"); return 3; } + total_rlen += len; do_write(outfd, rbuf, len); } if (fds.revents & POLLOUT) { - if (wlen == 0) { - woff = 0; - wlen = read(infd, wbuf, sizeof(wbuf)); + if (winfo->len == 0) { + winfo->off = 0; + winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); } - if (wlen > 0) { + if (winfo->len > 0) { ssize_t bw; - bw = do_rnd_write(peerfd, wbuf + woff, wlen); - if (bw < 0) + /* limit the total amount of written data to the trunc value */ + if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) + winfo->len = cfg_truncate - total_wlen; + + bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); + if (bw < 0) { + if (cfg_rcv_trunc) + return 0; + perror("write"); return 111; + } - woff += bw; - wlen -= bw; - } else if (wlen == 0) { + winfo->off += bw; + winfo->len -= bw; + total_wlen += bw; + } else if (winfo->len == 0) { /* We have no more data to send. */ fds.events &= ~POLLOUT; @@ -395,13 +711,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) /* ... and peer also closed already */ break; - /* ... but we still receive. - * Close our write side, ev. give some time - * for address notification - */ - if (cfg_join) - usleep(400000); - shutdown(peerfd, SHUT_WR); + shut_wr(peerfd); } else { if (errno == EINTR) continue; @@ -411,17 +721,22 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } if (fds.revents & (POLLERR | POLLNVAL)) { + if (cfg_rcv_trunc) + return 0; fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; } + + if (cfg_truncate > 0 && total_wlen >= cfg_truncate && + total_rlen >= cfg_truncate) + break; } /* leave some time for late join/announce */ - if (cfg_join) - usleep(400000); + if (cfg_remove && !quit) + usleep(cfg_wait); - close(peerfd); return 0; } @@ -444,10 +759,26 @@ static int do_recvfile(int infd, int outfd) return (int)r; } -static int do_mmap(int infd, int outfd, unsigned int size) +static int spool_buf(int fd, struct wstate *winfo) +{ + while (winfo->len) { + int ret = write(fd, winfo->buf + winfo->off, winfo->len); + + if (ret < 0) { + perror("write"); + return 4; + } + winfo->off += ret; + winfo->len -= ret; + } + return 0; +} + +static int do_mmap(int infd, int outfd, unsigned int size, + struct wstate *winfo) { char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); - ssize_t ret = 0, off = 0; + ssize_t ret = 0, off = winfo->total_len; size_t rem; if (inbuf == MAP_FAILED) { @@ -455,7 +786,11 @@ static int do_mmap(int infd, int outfd, unsigned int size) return 1; } - rem = size; + ret = spool_buf(outfd, winfo); + if (ret < 0) + return ret; + + rem = size - winfo->total_len; while (rem > 0) { ret = write(outfd, inbuf + off, rem); @@ -499,8 +834,16 @@ static int get_infd_size(int fd) return (int)count; } -static int do_sendfile(int infd, int outfd, unsigned int count) +static int do_sendfile(int infd, int outfd, unsigned int count, + struct wstate *winfo) { + int ret = spool_buf(outfd, winfo); + + if (ret < 0) + return ret; + + count -= winfo->total_len; + while (count > 0) { ssize_t r; @@ -517,7 +860,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count) } static int copyfd_io_mmap(int infd, int peerfd, int outfd, - unsigned int size) + unsigned int size, bool *in_closed_after_out, + struct wstate *winfo) { int err; @@ -526,22 +870,23 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, if (err) return err; - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); } else { - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); if (err) return err; - shutdown(peerfd, SHUT_WR); + shut_wr(peerfd); err = do_recvfile(peerfd, outfd); + *in_closed_after_out = true; } return err; } static int copyfd_io_sendfile(int infd, int peerfd, int outfd, - unsigned int size) + unsigned int size, bool *in_closed_after_out, struct wstate *winfo) { int err; @@ -550,40 +895,85 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, if (err) return err; - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); } else { - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); if (err) return err; + + shut_wr(peerfd); + err = do_recvfile(peerfd, outfd); + *in_closed_after_out = true; } return err; } -static int copyfd_io(int infd, int peerfd, int outfd) +static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) { + bool in_closed_after_out = false; + struct timespec start, end; int file_size; + int ret; + + if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) + xerror("can not fetch start time %d", errno); switch (cfg_mode) { case CFG_MODE_POLL: - return copyfd_io_poll(infd, peerfd, outfd); + ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, + winfo); + break; + case CFG_MODE_MMAP: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - return copyfd_io_mmap(infd, peerfd, outfd, file_size); + ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); + break; + case CFG_MODE_SENDFILE: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - return copyfd_io_sendfile(infd, peerfd, outfd, file_size); + ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); + break; + + default: + fprintf(stderr, "Invalid mode %d\n", cfg_mode); + + die_usage(); + return 1; } - fprintf(stderr, "Invalid mode %d\n", cfg_mode); + if (ret) + return ret; - die_usage(); - return 1; + if (close_peerfd) + close(peerfd); + + if (cfg_time) { + unsigned int delta_ms; + + if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) + xerror("can not fetch end time %d", errno); + delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; + if (delta_ms > cfg_time) { + xerror("transfer slower than expected! runtime %d ms, expected %d ms", + delta_ms, cfg_time); + } + + /* show the runtime only if this end shutdown(wr) before receiving the EOF, + * (that is, if this end got the longer runtime) + */ + if (in_closed_after_out) + fprintf(stderr, "%d", delta_ms); + } + + return 0; } static void check_sockaddr(int pf, struct sockaddr_storage *ss, @@ -676,17 +1066,20 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (!cfg_join && (r & 1)) + if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1)) close(fd); } int main_loop_s(int listensock) { struct sockaddr_storage ss; + struct wstate winfo; struct pollfd polls; socklen_t salen; int remotesock; + int fd = 0; +again: polls.fd = listensock; polls.events = POLLIN; @@ -707,47 +1100,206 @@ int main_loop_s(int listensock) check_sockaddr(pf, &ss, salen); check_getpeername(remotesock, &ss, salen); - return copyfd_io(0, remotesock, 1); + if (cfg_input) { + fd = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s: %d", cfg_input, errno); + } + + SOCK_TEST_TCPULP(remotesock, 0); + + memset(&winfo, 0, sizeof(winfo)); + copyfd_io(fd, remotesock, 1, true, &winfo); + } else { + perror("accept"); + return 1; } - perror("accept"); + if (--cfg_repeat > 0) { + if (cfg_input) + close(fd); + goto again; + } - return 1; + return 0; } static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); } +static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) +{ + int err; + + err = setsockopt(fd, level, optname, optval, optlen); + if (err) { + perror("setsockopt"); + exit(1); + } +} + +static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) +{ + static const unsigned int on = 1; + + if (cmsg->timestampns) + xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); + if (cmsg->tcp_inq) + xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); +} + +static void parse_cmsg_types(const char *type) +{ + char *next = strchr(type, ','); + unsigned int len = 0; + + cfg_cmsg_types.cmsg_enabled = 1; + + if (next) { + parse_cmsg_types(next + 1); + len = next - type; + } else { + len = strlen(type); + } + + if (strncmp(type, "TIMESTAMPNS", len) == 0) { + cfg_cmsg_types.timestampns = 1; + return; + } + + if (strncmp(type, "TCPINQ", len) == 0) { + cfg_cmsg_types.tcp_inq = 1; + return; + } + + fprintf(stderr, "Unrecognized cmsg option %s\n", type); + exit(1); +} + +static void parse_setsock_options(const char *name) +{ + char *next = strchr(name, ','); + unsigned int len = 0; + + if (next) { + parse_setsock_options(next + 1); + len = next - name; + } else { + len = strlen(name); + } + + if (strncmp(name, "TRANSPARENT", len) == 0) { + cfg_sockopt_types.transparent = 1; + return; + } + + if (strncmp(name, "MPTFO", len) == 0) { + cfg_sockopt_types.mptfo = 1; + return; + } + + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); + exit(1); +} + +void xdisconnect(int fd, int addrlen) +{ + struct sockaddr_storage empty; + int msec_sleep = 10; + int queued = 1; + int i; + + shutdown(fd, SHUT_WR); + + /* while until the pending data is completely flushed, the later + * disconnect will bypass/ignore/drop any pending data. + */ + for (i = 0; ; i += msec_sleep) { + if (ioctl(fd, SIOCOUTQ, &queued) < 0) + xerror("can't query out socket queue: %d", errno); + + if (!queued) + break; + + if (i > poll_timeout) + xerror("timeout while waiting for spool to complete"); + usleep(msec_sleep * 1000); + } + + memset(&empty, 0, sizeof(empty)); + empty.ss_family = AF_UNSPEC; + if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0) + xerror("can't disconnect: %d", errno); +} + int main_loop(void) { - int fd; + int fd = 0, ret, fd_in = 0; + struct addrinfo *peer; + struct wstate winfo; - /* listener is ready. */ - fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto); + if (cfg_input && cfg_sockopt_types.mptfo) { + fd_in = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s:%d", cfg_input, errno); + } + + memset(&winfo, 0, sizeof(winfo)); + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); if (fd < 0) return 2; +again: check_getpeername_connect(fd); + SOCK_TEST_TCPULP(fd, cfg_sock_proto); + if (cfg_rcvbuf) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); - return copyfd_io(0, fd, 1); + if (cfg_input && !cfg_sockopt_types.mptfo) { + fd_in = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s:%d", cfg_input, errno); + } + + ret = copyfd_io(fd_in, fd, 1, 0, &winfo); + if (ret) + return ret; + + if (cfg_truncate > 0) { + xdisconnect(fd, peer->ai_addrlen); + } else if (--cfg_repeat > 0) { + xdisconnect(fd, peer->ai_addrlen); + + /* the socket could be unblocking at this point, we need the + * connect to be blocking + */ + set_nonblock(fd, false); + if (connect(fd, peer->ai_addr, peer->ai_addrlen)) + xerror("can't reconnect: %d", errno); + if (cfg_input) + close(fd_in); + memset(&winfo, 0, sizeof(winfo)); + goto again; + } else { + close(fd); + } + + return 0; } int parse_proto(const char *proto) @@ -785,6 +1337,26 @@ int parse_mode(const char *mode) return 0; } +int parse_peek(const char *mode) +{ + if (!strcasecmp(mode, "saveWithPeek")) + return CFG_WITH_PEEK; + if (!strcasecmp(mode, "saveAfterPeek")) + return CFG_AFTER_PEEK; + + fprintf(stderr, "Unknown: %s\n", mode); + fprintf(stderr, "Supported MSG_PEEK mode are:\n"); + fprintf(stderr, + "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n"); + fprintf(stderr, + "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n"); + + die_usage(); + + /* silence compiler warning */ + return 0; +} + static int parse_int(const char *size) { unsigned long s; @@ -812,12 +1384,37 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) { + while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { switch (c) { + case 'f': + cfg_truncate = atoi(optarg); + + /* when receiving a fastclose, ignore PIPE signals and + * all the I/O errors later in the code + */ + if (cfg_truncate < 0) { + cfg_rcv_trunc = true; + signal(SIGPIPE, handle_signal); + } + break; case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; break; + case 'r': + cfg_remove = true; + cfg_mode = CFG_MODE_POLL; + cfg_wait = 400000; + cfg_do_w = atoi(optarg); + if (cfg_do_w <= 0) + cfg_do_w = 50; + break; + case 'i': + cfg_input = optarg; + break; + case 'I': + cfg_repeat = atoi(optarg); + break; case 'l': listen_mode = true; break; @@ -830,9 +1427,6 @@ static void parse_opts(int argc, char **argv) case 'h': die_usage(); break; - case 'u': - tcpulp_audit = true; - break; case '6': pf = AF_INET6; break; @@ -841,6 +1435,9 @@ static void parse_opts(int argc, char **argv) if (poll_timeout <= 0) poll_timeout = -1; break; + case 'T': + cfg_time = atoi(optarg); + break; case 'm': cfg_mode = parse_mode(optarg); break; @@ -850,6 +1447,21 @@ static void parse_opts(int argc, char **argv) case 'R': cfg_rcvbuf = parse_int(optarg); break; + case 'w': + cfg_wait = atoi(optarg)*1000000; + break; + case 'M': + cfg_mark = strtol(optarg, NULL, 0); + break; + case 'P': + cfg_peek = parse_peek(optarg); + break; + case 'c': + parse_cmsg_types(optarg); + break; + case 'o': + parse_setsock_options(optarg); + break; } } @@ -865,11 +1477,9 @@ int main(int argc, char *argv[]) { init_rng(); + signal(SIGUSR1, handle_signal); parse_opts(argc, argv); - if (tcpulp_audit) - return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1; - if (listen_mode) { int fd = sock_listen_mptcp(cfg_host, cfg_port); @@ -880,6 +1490,10 @@ int main(int argc, char *argv[]) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); + if (cfg_mark) + set_mark(fd, cfg_mark); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); return main_loop_s(fd); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index acf02e156d20..b77fb7065bfb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -1,26 +1,39 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + time_start=$(date +%s) -optstring="S:R:d:e:l:r:h4cm:" +optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 +final_ret=0 sin="" sout="" +cin_disconnect="" cin="" cout="" -ksft_skip=4 capture=false -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) ipv6=true ethtool_random_on=true -tc_delay="$((RANDOM%400))" +tc_delay="$((RANDOM%50))" tc_loss=$((RANDOM%101)) -tc_reorder="" testmode="" sndbuf=0 rcvbuf=0 options_log=true +do_tcp=0 +checksum=false +filesize=0 +connect_per_transfer=1 +port=$((10000 - 1)) if [ $tc_loss -eq 100 ];then tc_loss=1% @@ -40,23 +53,26 @@ usage() { echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)" echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-f: size of file to transfer in bytes (default random)" echo -e "\t-S: set sndbuf value (default: use kernel default)" echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" + echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" + echo -e "\t-C: enable the MPTCP data checksum" } while getopts "$optstring" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "d") if [ $OPTARG -ge 0 ];then tc_delay="$OPTARG" else echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "e") @@ -80,7 +96,7 @@ while getopts "$optstring" option;do sndbuf="$OPTARG" else echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "R") @@ -88,57 +104,61 @@ while getopts "$optstring" option;do rcvbuf="$OPTARG" else echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "m") testmode="$OPTARG" ;; + "f") + filesize="$OPTARG" + ;; + "t") + do_tcp=$((do_tcp+1)) + ;; + "C") + checksum=true + ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" -ns4="ns4-$rndh" +ns1="" +ns2="" +ns3="" +ns4="" -TEST_COUNT=0 +TEST_GROUP="" +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { + rm -f "$cin_disconnect" "$cout_disconnect" rm -f "$cin" "$cout" rm -f "$sin" "$sout" rm -f "$capout" - local netns - for netns in "$ns1" "$ns2" "$ns3" "$ns4";do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}" } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms +mptcp_lib_check_tools ip tc sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) cout=$(mktemp) capout=$(mktemp) +cin_disconnect="$cin".disconnect +cout_disconnect="$cout".disconnect trap cleanup EXIT -for i in "$ns1" "$ns2" "$ns3" "$ns4";do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +mptcp_lib_ns_init ns1 ns2 ns3 ns4 # "$ns1" ns2 ns3 ns4 # ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3 @@ -186,13 +206,20 @@ ip -net "$ns4" link set ns4eth3 up ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2 +if $checksum; then + for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1 + done +fi + set_ethtool_flags() { local ns="$1" local dev="$2" local flags="$3" - ip netns exec $ns ethtool -K $dev $flags 2>/dev/null - [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags" + if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then + mptcp_lib_pr_info "set $ns dev $dev: ethtool -K $flags" + fi } set_random_ethtool_flags() { @@ -220,100 +247,62 @@ else set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" fi -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - - return 1 - fi - - return 0 +print_larger_title() { + # here we don't have the time, a bit longer for the alignment + MPTCP_LIB_TEST_FORMAT="%02u %-69s" \ + mptcp_lib_print_title "${@}" } check_mptcp_disabled() { local disabled_ns - disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)" - ip netns add ${disabled_ns} || exit $ksft_skip + mptcp_lib_ns_init disabled_ns + print_larger_title "New MPTCP socket can be blocked via sysctl" # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then - echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]" - ret=1 + mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" + mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default" + ret=${KSFT_FAIL} return 1 fi ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 local err=0 - LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 - ip netns delete ${disabled_ns} + mptcp_lib_ns_exit "${disabled_ns}" if [ ${err} -eq 0 ]; then - echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]" - ret=1 + mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl" + mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl" + ret=${KSFT_FAIL} return 1 fi - echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]" + mptcp_lib_pr_ok + mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl" return 0 } -check_mptcp_ulp_setsockopt() -{ - local t retval - t="ns_ulp-$sech-$(mktemp -u XXXXXX)" - - ip netns add ${t} || exit $ksft_skip - if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then - printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n" - retval=1 - ret=$retval - else - printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n" - retval=0 - fi - ip netns del ${t} - return $retval -} - -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - do_ping() { local listener_ns="$1" local connector_ns="$2" local connect_addr="$3" local ping_args="-q -c 1" + local rc=0 - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then $ipv6 || return 0 ping_args="${ping_args} -6" fi - ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null - if [ $? -ne 0 ] ; then - echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 - ret=1 + ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1 + + if [ $rc -ne 0 ] ; then + mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity" + ret=${KSFT_FAIL} return 1 fi @@ -321,23 +310,6 @@ do_ping() return 0 } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local listener_ns="$1" @@ -346,28 +318,26 @@ do_transfer() local srv_proto="$4" local connect_addr="$5" local local_addr="$6" - local extra_args="" + local extra_args="$7" - local port - port=$((10000+$TEST_COUNT)) - TEST_COUNT=$((TEST_COUNT+1)) + port=$((port + 1)) if [ "$rcvbuf" -gt 0 ]; then - extra_args="$extra_args -R $rcvbuf" + extra_args+=" -R $rcvbuf" fi if [ "$sndbuf" -gt 0 ]; then - extra_args="$extra_args -S $sndbuf" + extra_args+=" -S $sndbuf" fi if [ -n "$testmode" ]; then - extra_args="$extra_args -m $testmode" + extra_args+=" -m $testmode" fi if [ -n "$extra_args" ] && $options_log; then - options_log=false - echo "INFO: extra options: $extra_args" + mptcp_lib_pr_info "extra options: $extra_args" fi + options_log=false :> "$cout" :> "$sout" @@ -375,32 +345,67 @@ do_transfer() local addr_port addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto} + local result_msg + result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" + mptcp_lib_print_title "${result_msg}" if $capture; then local capuser + local rndh="${connector_ns:4}" if [ -z $SUDO_USER ] ; then capuser="" else capuser="-Z $SUDO_USER" fi - local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap" + local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}" + local capopt="-i any -s 65535 -B 32768 ${capuser}" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - local cappid=$! + ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + local cappid_listener=$! + + ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! sleep 1 fi - ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + if [ ${listener_ns} != ${connector_ns} ]; then + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + fi + + local stat_synrx_last_l + local stat_ackrx_last_l + local stat_cookietx_last + local stat_cookierx_last + local stat_csum_err_s + local stat_csum_err_c + local stat_tcpfb_last_l + stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") + + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! wait $cpid @@ -413,56 +418,166 @@ do_transfer() if $capture; then sleep 1 - kill $cappid + kill ${cappid_listener} + kill ${cappid_connector} + fi + + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + if [ ${listener_ns} != ${connector_ns} ]; then + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out fi local duration duration=$((stop-start)) - duration=$(printf "(duration %05sms)" $duration) + result_msg+=" # time=${duration}ms" + printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 - ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 - ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" - + mptcp_lib_pr_fail "client exit code $retc, server $rets" + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + cat /tmp/${listener_ns}.out + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out + + echo cat "$capout" + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" return 1 fi - check_transfer $sin $cout "file received by client" + mptcp_lib_check_transfer $sin $cout "file received by client" retc=$? - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - echo "$duration [ OK ]" - cat "$capout" - return 0 + local extra="" + local stat_synrx_now_l + local stat_ackrx_now_l + local stat_cookietx_now + local stat_cookierx_now + local stat_ooo_now + local stat_tcpfb_now_l + stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") + + expect_synrx=$((stat_synrx_last_l)) + expect_ackrx=$((stat_ackrx_last_l)) + + cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies) + cookies=${cookies##*=} + + if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then + expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) + expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) + fi + + if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then + mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \ + "than expected (${expect_synrx})" + retc=1 + fi + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ooo_now} -eq 0 ]; then + mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ + "than expected (${expect_ackrx})" + rets=1 + else + extra+=" [ Note ] fallback due to TCP OoO" + fi + fi + + if $checksum; then + local csum_err_s + local csum_err_c + csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + + local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) + if [ $csum_err_s_nr -gt 0 ]; then + mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]" + rets=1 + fi + + local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) + if [ $csum_err_c_nr -gt 0 ]; then + mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]" + retc=1 + fi + fi + + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + mptcp_lib_pr_fail "unexpected fallback to TCP" + rets=1 + fi + + if [ $cookies -eq 2 ];then + if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then + extra+=" WARN: CookieSent: did not advance" + fi + if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then + extra+=" WARN: CookieRecv: did not advance" + fi + else + if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then + extra+=" WARN: CookieSent: changed" + fi + if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then + extra+=" WARN: CookieRecv: changed" + fi + fi + + if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then + extra+=" WARN: SYNRX: expect ${expect_synrx}," + extra+=" got ${stat_synrx_now_l} (probably retransmissions)" + fi + if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then + extra+=" WARN: ACKRX: expect ${expect_ackrx}," + extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" + fi + + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then + mptcp_lib_pr_ok "${extra:1}" + mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + else + if [ -n "${extra}" ]; then + mptcp_lib_print_warn "${extra:1}" + fi + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" fi cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() { local name=$1 local who=$2 + local SIZE=$filesize + local ksize + local rem - local SIZE TSIZE - SIZE=$((RANDOM % (1024 * 8))) - TSIZE=$((SIZE * 1024)) + if [ $SIZE -eq 0 ]; then + local MAXSIZE=$((1024 * 1024 * 8)) + local MINSIZE=$((1024 * 256)) - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE)) + fi + + ksize=$((SIZE / 1024)) + rem=$((SIZE - (ksize * 1024))) - SIZE=$((RANDOM % 1024)) - SIZE=$((SIZE + 128)) - TSIZE=$((TSIZE + SIZE)) - dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $ksize + dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null - echo "Created $name (size $TSIZE) containing data sent by $who" + echo "Created $name (size $(du -b "$name")) containing data sent by $who" } run_tests_lo() @@ -471,6 +586,7 @@ run_tests_lo() local connector_ns="$2" local connect_addr="$3" local loopback="$4" + local extra_args="$5" local lret=0 # skip if test programs are running inside same netns for subsequent runs. @@ -479,43 +595,58 @@ run_tests_lo() fi # skip if we don't want v6 - if ! $ipv6 && is_v6 "${connect_addr}"; then + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then return 0 fi local local_addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" else local_addr="0.0.0.0" fi - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret return 1 fi - # don't bother testing fallback tcp except for loopback case. - if [ ${listener_ns} != ${connector_ns} ]; then - return 0 + if [ $do_tcp -eq 0 ]; then + # don't bother testing fallback tcp except for loopback case. + if [ ${listener_ns} != ${connector_ns} ]; then + return 0 + fi fi - do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP TCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret return 1 fi - do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} TCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret return 1 fi + if [ $do_tcp -gt 1 ] ;then + do_transfer ${listener_ns} ${connector_ns} TCP TCP \ + ${connect_addr} ${local_addr} "${extra_args}" + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + fi + return 0 } @@ -524,14 +655,204 @@ run_tests() run_tests_lo $1 $2 $3 0 } +run_test_transparent() +{ + local connect_addr="$1" + local msg="$2" + + local connector_ns="$ns1" + local listener_ns="$ns2" + local lret=0 + local r6flag="" + + TEST_GROUP="${msg}" + + # skip if we don't want v6 + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then + return 0 + fi + + # IP(V6)_TRANSPARENT has been added after TOS support which came with + # the required infrastructure in MPTCP sockopt code. To support TOS, the + # following function has been exported (T). Not great but better than + # checking for a specific kernel version. + if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then + mptcp_lib_pr_skip "${msg} not supported by the kernel" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" +flush ruleset +table inet mangle { + chain divert { + type filter hook prerouting priority -150; + + meta l4proto tcp socket transparent 1 meta mark set 1 accept + tcp dport 20000 tproxy to :20000 meta mark set 1 accept + } +} +EOF + then + mptcp_lib_pr_skip "$msg, could not load nft ruleset" + mptcp_lib_fail_if_expected_feature "nft rules" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + local local_addr + if mptcp_lib_is_v6 "${connect_addr}"; then + local_addr="::" + r6flag="-6" + else + local_addr="0.0.0.0" + fi + + if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then + ip netns exec "$listener_ns" nft flush ruleset + mptcp_lib_pr_skip "$msg, ip $r6flag rule failed" + mptcp_lib_fail_if_expected_feature "ip rule" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + mptcp_lib_pr_skip "$msg, ip route add local $local_addr failed" + mptcp_lib_fail_if_expected_feature "ip route" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + mptcp_lib_pr_info "test $msg" + + port=$((20000 - 1)) + local extra_args="-o TRANSPARENT" + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" + lret=$? + + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100 + + if [ $lret -ne 0 ]; then + mptcp_lib_pr_fail "$msg, mptcp connection error" + ret=$lret + return 1 + fi + + mptcp_lib_pr_info "$msg pass" + return 0 +} + +run_tests_peekmode() +{ + local peekmode="$1" + + TEST_GROUP="peek mode: ${peekmode}" + mptcp_lib_pr_info "with peek mode: ${peekmode}" + run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}" + run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" +} + +run_tests_mptfo() +{ + TEST_GROUP="MPTFO" + + if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then + mptcp_lib_pr_skip "TFO not supported by the kernel" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + mptcp_lib_pr_info "with MPTFO start" + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 + + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0 + mptcp_lib_pr_info "with MPTFO end" +} + +run_tests_disconnect() +{ + local old_cin=$cin + local old_sin=$sin + + TEST_GROUP="full disconnect" + + if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then + mptcp_lib_pr_skip "Full disconnect not supported" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + cat $cin $cin $cin > "$cin".disconnect + + # force do_transfer to cope with the multiple transmissions + sin="$cin.disconnect" + cin="$cin.disconnect" + cin_disconnect="$old_cin" + connect_per_transfer=3 + + mptcp_lib_pr_info "disconnect" + run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin" + run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin" + + # restore previous status + sin=$old_sin + cin=$old_cin + cin_disconnect="$cin".disconnect + connect_per_transfer=1 +} + +display_time() +{ + time_end=$(date +%s) + time_run=$((time_end-time_start)) + + echo "Time: ${time_run} seconds" +} + +log_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + mptcp_lib_pr_fail "${msg}" + + final_ret=${ret} + ret=${KSFT_PASS} + + return ${final_ret} + fi +} + +stop_if_error() +{ + if ! log_if_error "${@}"; then + display_time + mptcp_lib_result_print_all_tap + exit ${final_ret} + fi +} + make_file "$cin" "client" make_file "$sin" "server" check_mptcp_disabled -check_mptcp_ulp_setsockopt +stop_if_error "The kernel configuration is not valid for MPTCP" -echo "INFO: validating network environment with pings" +print_larger_title "Validating network environment with pings" for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns1" $sender 10.0.1.1 do_ping "$ns1" $sender dead:beef:1::1 @@ -550,43 +871,59 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns4" $sender dead:beef:3::1 done -[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss -echo -n "INFO: Using loss of $tc_loss " -test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " +mptcp_lib_result_code "${ret}" "ping tests" + +stop_if_error "Could not even run ping tests" +mptcp_lib_pr_ok + +[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms +tc_info="loss of $tc_loss " +test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms " + +reorder_delay=$((tc_delay / 4)) if [ -z "${tc_reorder}" ]; then reorder1=$((RANDOM%10)) reorder1=$((100 - reorder1)) reorder2=$((RANDOM%100)) - if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then + if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then tc_reorder="reorder ${reorder1}% ${reorder2}%" - echo -n "$tc_reorder " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi elif [ "$tc_reorder" = "0" ];then tc_reorder="" -elif [ "$tc_delay" -gt 0 ];then +elif [ "$reorder_delay" -gt 0 ];then # reordering requires some delay tc_reorder="reorder $tc_reorder" - echo -n "$tc_reorder " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi -echo "on ns3eth4" +mptcp_lib_pr_info "Using ${tc_info}on ns3eth4" + +tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder -tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder +TEST_GROUP="loopback v4" +run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 +stop_if_error "Could not even run loopback test" +TEST_GROUP="loopback v6" +run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 +stop_if_error "Could not even run loopback v6 test" + +TEST_GROUP="multihosts" for sender in $ns1 $ns2 $ns3 $ns4;do - run_tests_lo "$ns1" "$sender" 10.0.1.1 1 - if [ $ret -ne 0 ] ;then - echo "FAIL: Could not even run loopback test" 1>&2 - exit $ret - fi - run_tests_lo "$ns1" $sender dead:beef:1::1 1 - if [ $ret -ne 0 ] ;then - echo "FAIL: Could not even run loopback v6 test" 2>&1 - exit $ret + # ns1<->ns2 is not subject to reordering/tc delays. Use it to test + # mptcp syncookie support. + if [ $sender = $ns1 ]; then + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 + else + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1 fi + run_tests "$ns1" $sender 10.0.1.1 + run_tests "$ns1" $sender dead:beef:1::1 + run_tests "$ns2" $sender 10.0.1.2 run_tests "$ns2" $sender dead:beef:1::2 run_tests "$ns2" $sender 10.0.2.1 @@ -599,11 +936,26 @@ for sender in $ns1 $ns2 $ns3 $ns4;do run_tests "$ns4" $sender 10.0.3.1 run_tests "$ns4" $sender dead:beef:3::1 + + log_if_error "Tests with $sender as a sender have failed" done -time_end=$(date +%s) -time_run=$((time_end-time_start)) +run_tests_peekmode "saveWithPeek" +run_tests_peekmode "saveAfterPeek" +log_if_error "Tests with peek mode have failed" + +# MPTFO (MultiPath TCP Fatopen tests) +run_tests_mptfo +log_if_error "Tests with MPTFO have failed" + +# connect to ns4 ip address, ns2 should intercept/proxy +run_test_transparent 10.0.3.1 "tproxy ipv4" +run_test_transparent dead:beef:3::1 "tproxy ipv6" +log_if_error "Tests with tproxy have failed" -echo "Time: ${time_run} seconds" +run_tests_disconnect +log_if_error "Tests of the full disconnection have failed" -exit $ret +display_time +mptcp_lib_result_print_all_tap +exit ${final_ret} diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c new file mode 100644 index 000000000000..218aac467321 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <string.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <time.h> + +#include <sys/ioctl.h> +#include <sys/random.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include <netdb.h> +#include <netinet/in.h> + +#include <linux/tcp.h> +#include <linux/sockios.h> + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif +#ifndef SOL_MPTCP +#define SOL_MPTCP 284 +#endif + +static int pf = AF_INET; +static int proto_tx = IPPROTO_MPTCP; +static int proto_rx = IPPROTO_MPTCP; + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage: mptcp_inq [-6] [ -t tcp|mptcp ] [ -r tcp|mptcp]\n"); + exit(r); +} + +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(1); +} + +static const char *getxinfo_strerr(int err) +{ + if (err == EAI_SYSTEM) + return strerror(errno); + + return gai_strerror(err); +} + +static void xgetaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, + struct addrinfo **res) +{ + int err = getaddrinfo(node, service, hints, res); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", + node ? node : "", service ? service : "", errstr); + exit(1); + } +} + +static int sock_listen_mptcp(const char * const listenaddr, + const char * const port) +{ + int sock = -1; + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_PASSIVE | AI_NUMERICHOST + }; + + hints.ai_family = pf; + + struct addrinfo *a, *addr; + int one = 1; + + xgetaddrinfo(listenaddr, port, &hints, &addr); + hints.ai_family = pf; + + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto_rx); + if (sock < 0) + continue; + + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one))) + perror("setsockopt"); + + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("bind"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + + if (sock < 0) + xerror("could not create listen socket"); + + if (listen(sock, 20)) + die_perror("listen"); + + return sock; +} + +static int sock_connect_mptcp(const char * const remoteaddr, + const char * const port, int proto) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1; + + hints.ai_family = pf; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto); + if (sock < 0) + continue; + + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + die_perror("connect"); + } + + if (sock < 0) + xerror("could not create connect socket"); + + freeaddrinfo(addr); + return sock; +} + +static int protostr_to_num(const char *s) +{ + if (strcasecmp(s, "tcp") == 0) + return IPPROTO_TCP; + if (strcasecmp(s, "mptcp") == 0) + return IPPROTO_MPTCP; + + die_usage(1); + return 0; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "h6t:r:")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case '6': + pf = AF_INET6; + break; + case 't': + proto_tx = protostr_to_num(optarg); + break; + case 'r': + proto_rx = protostr_to_num(optarg); + break; + default: + die_usage(1); + break; + } + } +} + +/* wait up to timeout milliseconds */ +static void wait_for_ack(int fd, int timeout, size_t total) +{ + int i; + + for (i = 0; i < timeout; i++) { + int nsd, ret, queued = -1; + struct timespec req; + + ret = ioctl(fd, TIOCOUTQ, &queued); + if (ret < 0) + die_perror("TIOCOUTQ"); + + ret = ioctl(fd, SIOCOUTQNSD, &nsd); + if (ret < 0) + die_perror("SIOCOUTQNSD"); + + if ((size_t)queued > total) + xerror("TIOCOUTQ %u, but only %zu expected\n", queued, total); + assert(nsd <= queued); + + if (queued == 0) + return; + + /* wait for peer to ack rx of all data */ + req.tv_sec = 0; + req.tv_nsec = 1 * 1000 * 1000ul; /* 1ms */ + nanosleep(&req, NULL); + } + + xerror("still tx data queued after %u ms\n", timeout); +} + +static void connect_one_server(int fd, int unixfd) +{ + size_t len, i, total, sent; + char buf[4096], buf2[4096]; + ssize_t ret; + + len = rand() % (sizeof(buf) - 1); + + if (len < 128) + len = 128; + + for (i = 0; i < len ; i++) { + buf[i] = rand() % 26; + buf[i] += 'A'; + } + + buf[i] = '\n'; + + /* un-block server */ + ret = read(unixfd, buf2, 4); + assert(ret == 4); + + assert(strncmp(buf2, "xmit", 4) == 0); + + ret = write(unixfd, &len, sizeof(len)); + assert(ret == (ssize_t)sizeof(len)); + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + + if (ret != (ssize_t)len) + xerror("short write"); + + ret = read(unixfd, buf2, 4); + assert(strncmp(buf2, "huge", 4) == 0); + + total = rand() % (16 * 1024 * 1024); + total += (1 * 1024 * 1024); + sent = total; + + ret = write(unixfd, &total, sizeof(total)); + assert(ret == (ssize_t)sizeof(total)); + + wait_for_ack(fd, 5000, len); + + while (total > 0) { + if (total > sizeof(buf)) + len = sizeof(buf); + else + len = total; + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + total -= ret; + + /* we don't have to care about buf content, only + * number of total bytes sent + */ + } + + ret = read(unixfd, buf2, 4); + assert(ret == 4); + assert(strncmp(buf2, "shut", 4) == 0); + + wait_for_ack(fd, 5000, sent); + + ret = write(fd, buf, 1); + assert(ret == 1); + close(fd); + ret = write(unixfd, "closed", 6); + assert(ret == 6); + + close(unixfd); +} + +static void get_tcp_inq(struct msghdr *msgh, unsigned int *inqv) +{ + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { + if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { + memcpy(inqv, CMSG_DATA(cmsg), sizeof(*inqv)); + return; + } + } + + xerror("could not find TCP_CM_INQ cmsg type"); +} + +static void process_one_client(int fd, int unixfd) +{ + unsigned int tcp_inq; + size_t expect_len; + char msg_buf[4096]; + char buf[4096]; + char tmp[16]; + struct iovec iov = { + .iov_base = buf, + .iov_len = 1, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = msg_buf, + .msg_controllen = sizeof(msg_buf), + }; + ssize_t ret, tot; + + ret = write(unixfd, "xmit", 4); + assert(ret == 4); + + ret = read(unixfd, &expect_len, sizeof(expect_len)); + assert(ret == (ssize_t)sizeof(expect_len)); + + if (expect_len > sizeof(buf)) + xerror("expect len %zu exceeds buffer size", expect_len); + + for (;;) { + struct timespec req; + unsigned int queued; + + ret = ioctl(fd, FIONREAD, &queued); + if (ret < 0) + die_perror("FIONREAD"); + if (queued > expect_len) + xerror("FIONREAD returned %u, but only %zu expected\n", + queued, expect_len); + if (queued == expect_len) + break; + + req.tv_sec = 0; + req.tv_nsec = 1000 * 1000ul; + nanosleep(&req, NULL); + } + + /* read one byte, expect cmsg to return expected - 1 */ + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + if (msg.msg_controllen == 0) + xerror("msg_controllen is 0"); + + get_tcp_inq(&msg, &tcp_inq); + + assert((size_t)tcp_inq == (expect_len - 1)); + + iov.iov_len = sizeof(buf); + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + /* should have gotten exact remainder of all pending data */ + assert(ret == (ssize_t)tcp_inq); + + /* should be 0, all drained */ + get_tcp_inq(&msg, &tcp_inq); + assert(tcp_inq == 0); + + /* request a large swath of data. */ + ret = write(unixfd, "huge", 4); + assert(ret == 4); + + ret = read(unixfd, &expect_len, sizeof(expect_len)); + assert(ret == (ssize_t)sizeof(expect_len)); + + /* peer should send us a few mb of data */ + if (expect_len <= sizeof(buf)) + xerror("expect len %zu too small\n", expect_len); + + tot = 0; + do { + iov.iov_len = sizeof(buf); + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + tot += ret; + + get_tcp_inq(&msg, &tcp_inq); + + if (tcp_inq > expect_len - tot) + xerror("inq %d, remaining %d total_len %d\n", + tcp_inq, expect_len - tot, (int)expect_len); + + assert(tcp_inq <= expect_len - tot); + } while ((size_t)tot < expect_len); + + ret = write(unixfd, "shut", 4); + assert(ret == 4); + + /* wait for hangup. Should have received one more byte of data. */ + ret = read(unixfd, tmp, sizeof(tmp)); + assert(ret == 6); + assert(strncmp(tmp, "closed", 6) == 0); + + sleep(1); + + iov.iov_len = 1; + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + assert(ret == 1); + + get_tcp_inq(&msg, &tcp_inq); + + /* tcp_inq should be 1 due to received fin. */ + assert(tcp_inq == 1); + + iov.iov_len = 1; + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + /* expect EOF */ + assert(ret == 0); + get_tcp_inq(&msg, &tcp_inq); + assert(tcp_inq == 1); + + close(fd); +} + +static int xaccept(int s) +{ + int fd = accept(s, NULL, 0); + + if (fd < 0) + die_perror("accept"); + + return fd; +} + +static int server(int unixfd) +{ + int fd = -1, r, on = 1; + + switch (pf) { + case AF_INET: + fd = sock_listen_mptcp("127.0.0.1", "15432"); + break; + case AF_INET6: + fd = sock_listen_mptcp("::1", "15432"); + break; + default: + xerror("Unknown pf %d\n", pf); + break; + } + + r = write(unixfd, "conn", 4); + assert(r == 4); + + alarm(15); + r = xaccept(fd); + + if (-1 == setsockopt(r, IPPROTO_TCP, TCP_INQ, &on, sizeof(on))) + die_perror("setsockopt"); + + process_one_client(r, unixfd); + + return 0; +} + +static int client(int unixfd) +{ + int fd = -1; + + alarm(15); + + switch (pf) { + case AF_INET: + fd = sock_connect_mptcp("127.0.0.1", "15432", proto_tx); + break; + case AF_INET6: + fd = sock_connect_mptcp("::1", "15432", proto_tx); + break; + default: + xerror("Unknown pf %d\n", pf); + } + + connect_one_server(fd, unixfd); + + return 0; +} + +static void init_rng(void) +{ + unsigned int foo; + + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); + } + + srand(foo); +} + +static pid_t xfork(void) +{ + pid_t p = fork(); + + if (p < 0) + die_perror("fork"); + else if (p == 0) + init_rng(); + + return p; +} + +static int rcheck(int wstatus, const char *what) +{ + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == 0) + return 0; + fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus)); + return WEXITSTATUS(wstatus); + } else if (WIFSIGNALED(wstatus)) { + xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus)); + } else if (WIFSTOPPED(wstatus)) { + xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus)); + } + + return 111; +} + +int main(int argc, char *argv[]) +{ + int e1, e2, wstatus; + pid_t s, c, ret; + int unixfds[2]; + + parse_opts(argc, argv); + + e1 = socketpair(AF_UNIX, SOCK_DGRAM, 0, unixfds); + if (e1 < 0) + die_perror("pipe"); + + s = xfork(); + if (s == 0) + return server(unixfds[1]); + + close(unixfds[1]); + + /* wait until server bound a socket */ + e1 = read(unixfds[0], &e1, 4); + assert(e1 == 4); + + c = xfork(); + if (c == 0) + return client(unixfds[0]); + + close(unixfds[0]); + + ret = waitpid(s, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e1 = rcheck(wstatus, "server"); + ret = waitpid(c, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e2 = rcheck(wstatus, "client"); + + return e1 ? e1 : e2; +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index dd42c2f692d0..2b66c5fa71eb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1,41 +1,110 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +# ShellCheck incorrectly believes that most of the code here is unreachable +# because it's invoked by variable name, see how the "tests" array is used +#shellcheck disable=SC2317 + +. "$(dirname "${0}")/mptcp_lib.sh" + ret=0 sin="" +sinfail="" sout="" cin="" +cinfail="" +cinsent="" +tmpfile="" cout="" -ksft_skip=4 -timeout=30 -capture=0 +err="" +capout="" +ns1="" +ns2="" +iptables="iptables" +ip6tables="ip6tables" +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) +capture=false +checksum=false +check_invert=0 +validate_checksum=false +init=0 +evts_ns1="" +evts_ns2="" +evts_ns1_pid=0 +evts_ns2_pid=0 +last_test_failed=0 +last_test_skipped=0 +last_test_ignored=1 + +declare -A all_tests +declare -a only_tests_ids +declare -a only_tests_names +declare -A failed_tests +MPTCP_LIB_TEST_FORMAT="%03u %s\n" +TEST_NAME="" +nr_blank=6 -TEST_COUNT=0 +# These var are used only in some tests, make sure they are not already set +unset FAILING_LINKS +unset test_linkfail +unset addr_nr_ns1 +unset addr_nr_ns2 +unset cestab_ns1 +unset cestab_ns2 +unset sflags +unset fastclose +unset fullmesh +unset speed -init() +# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || +# (ip6 && (ip6[74] & 0xf0) == 0x30)'" +CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, + 48 0 0 0, + 84 0 0 240, + 21 0 3 64, + 48 0 0 54, + 84 0 0 240, + 21 6 7 48, + 48 0 0 0, + 84 0 0 240, + 21 0 4 96, + 48 0 0 74, + 84 0 0 240, + 21 0 1 48, + 6 0 0 65535, + 6 0 0 0" + +init_partial() { capout=$(mktemp) - rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) - - ns1="ns1-$rndh" - ns2="ns2-$rndh" + mptcp_lib_ns_init ns1 ns2 - for netns in "$ns1" "$ns2";do - ip netns add $netns || exit $ksft_skip - ip -net $netns link set lo up - ip netns exec $netns sysctl -q net.mptcp.enabled=1 - ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + local netns + for netns in "$ns1" "$ns2"; do + ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true + if $checksum; then + ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 + fi done - # ns1 ns2 + check_invert=0 + validate_checksum=$checksum + + # ns1 ns2 # ns1eth1 ns2eth1 # ns1eth2 ns2eth2 # ns1eth3 ns2eth3 # ns1eth4 ns2eth4 - for i in `seq 1 4`; do + local i + for i in $(seq 1 4); do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad @@ -47,6 +116,16 @@ init() # let $ns2 reach any $ns1 address from any interface ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i + done +} + +init_shapers() +{ + local i + for i in $(seq 1 4); do + tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1ms + tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1ms done } @@ -54,131 +133,882 @@ cleanup_partial() { rm -f "$capout" - for netns in "$ns1" "$ns2"; do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" +} + +init() { + init=1 + + mptcp_lib_check_mptcp + mptcp_lib_check_kallsyms + mptcp_lib_check_tools ip tc ss "${iptables}" "${ip6tables}" + + sin=$(mktemp) + sout=$(mktemp) + cin=$(mktemp) + cinsent=$(mktemp) + cout=$(mktemp) + err=$(mktemp) + evts_ns1=$(mktemp) + evts_ns2=$(mktemp) + + trap cleanup EXIT + + make_file "$cin" "client" 1 >/dev/null + make_file "$sin" "server" 1 >/dev/null } cleanup() { - rm -f "$cin" "$cout" - rm -f "$sin" "$sout" + rm -f "$cin" "$cout" "$sinfail" + rm -f "$sin" "$sout" "$cinsent" "$cinfail" + rm -f "$tmpfile" + rm -rf $evts_ns1 $evts_ns2 + rm -f "$err" cleanup_partial } +print_check() +{ + printf "%-${nr_blank}s%-36s" " " "${*}" +} + +print_info() +{ + # It can be empty, no need to print anything then + [ -z "${1}" ] && return + + mptcp_lib_print_info " Info: ${*}" +} + +print_ok() +{ + mptcp_lib_pr_ok "${@}" +} + +print_fail() +{ + mptcp_lib_pr_fail "${@}" +} + +print_skip() +{ + mptcp_lib_pr_skip "${@}" +} + +# [ $1: fail msg ] +mark_as_skipped() +{ + local msg="${1:-"Feature not supported"}" + + mptcp_lib_fail_if_expected_feature "${msg}" + + print_check "${msg}" + print_skip + + last_test_skipped=1 +} + +# $@: condition +continue_if() +{ + if ! "${@}"; then + mark_as_skipped + return 1 + fi +} + +skip_test() +{ + if [ "${#only_tests_ids[@]}" -eq 0 ] && [ "${#only_tests_names[@]}" -eq 0 ]; then + return 1 + fi + + local i + for i in "${only_tests_ids[@]}"; do + if [ "$((MPTCP_LIB_TEST_COUNTER+1))" -eq "${i}" ]; then + return 1 + fi + done + for i in "${only_tests_names[@]}"; do + if [ "${TEST_NAME}" = "${i}" ]; then + return 1 + fi + done + + return 0 +} + +append_prev_results() +{ + if [ ${last_test_failed} -eq 1 ]; then + mptcp_lib_result_fail "${TEST_NAME}" + elif [ ${last_test_skipped} -eq 1 ]; then + mptcp_lib_result_skip "${TEST_NAME}" + elif [ ${last_test_ignored} -ne 1 ]; then + mptcp_lib_result_pass "${TEST_NAME}" + fi + + last_test_failed=0 + last_test_skipped=0 + last_test_ignored=0 +} + +# $1: test name reset() { - cleanup_partial - init + append_prev_results + + TEST_NAME="${1}" + + MPTCP_LIB_SUBTEST_FLAKY=0 # reset if modified + + if skip_test; then + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) + last_test_ignored=1 + return 1 + fi + + mptcp_lib_print_title "${TEST_NAME}" + + if [ "${init}" != "1" ]; then + init + else + cleanup_partial + fi + + init_partial + + return 0 } -for arg in "$@"; do - if [ "$arg" = "-c" ]; then - capture=1 +# $1: test name ; $2: counter to check +reset_check_counter() +{ + reset "${1}" || return 1 + + local counter="${2}" + + if ! nstat -asz "${counter}" | grep -wq "${counter}"; then + mark_as_skipped "counter '${counter}' is not available" + return 1 fi -done +} -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +# $1: test name +reset_with_cookies() +{ + reset "${1}" || return 1 + local netns + for netns in "$ns1" "$ns2"; do + ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2 + done +} -check_transfer() +# $1: test name +reset_with_add_addr_timeout() { - in=$1 - out=$2 - what=$3 + local ip="${2:-4}" + local tables + + reset "${1}" || return 1 + + tables="${iptables}" + if [ $ip -eq 6 ]; then + tables="${ip6tables}" + fi - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ + -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ + -j DROP; then + mark_as_skipped "unable to set the 'add addr' rule" return 1 fi +} + +# $1: test name +reset_with_checksum() +{ + local ns1_enable=$1 + local ns2_enable=$2 + + reset "checksum test ${1} ${2}" || return 1 + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable + + validate_checksum=true +} + +reset_with_allow_join_id0() +{ + local ns1_enable=$2 + local ns2_enable=$3 + + reset "${1}" || return 1 + + ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable +} + +# Modify TCP payload without corrupting the TCP packet +# +# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets +# carrying enough data. +# Once it is done, the TCP Checksum field is updated so the packet is still +# considered as valid at the TCP level. +# Because the MPTCP checksum, covering the TCP options and data, has not been +# updated, the modification will be detected and an MP_FAIL will be emitted: +# what we want to validate here without corrupting "random" MPTCP options. +# +# To avoid having tc producing this pr_info() message for each TCP ACK packets +# not carrying enough data: +# +# tc action pedit offset 162 out of bounds +# +# Netfilter is used to mark packets with enough data. +setup_fail_rules() +{ + check_invert=1 + validate_checksum=true + local i="$1" + local ip="${2:-4}" + local tables + + tables="${iptables}" + if [ $ip -eq 6 ]; then + tables="${ip6tables}" + fi + + ip netns exec $ns2 $tables \ + -t mangle \ + -A OUTPUT \ + -o ns2eth$i \ + -p tcp \ + -m length --length 150:9999 \ + -m statistic --mode nth --packet 1 --every 99999 \ + -j MARK --set-mark 42 || return ${KSFT_SKIP} + + tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${KSFT_SKIP} + tc -n $ns2 filter add dev ns2eth$i egress \ + protocol ip prio 1000 \ + handle 42 fw \ + action pedit munge offset 148 u8 invert \ + pipe csum tcp \ + index 100 || return ${KSFT_SKIP} +} + +reset_with_fail() +{ + reset_check_counter "${1}" "MPTcpExtInfiniteMapTx" || return 1 + shift + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1 + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1 + + local rc=0 + setup_fail_rules "${@}" || rc=$? + + if [ ${rc} -eq ${KSFT_SKIP} ]; then + mark_as_skipped "unable to set the 'fail' rules" + return 1 + fi +} + +reset_with_events() +{ + reset "${1}" || return 1 + + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid +} + +reset_with_tcp_filter() +{ + reset "${1}" || return 1 + shift + + local ns="${!1}" + local src="${2}" + local target="${3}" + + if ! ip netns exec "${ns}" ${iptables} \ + -A INPUT \ + -s "${src}" \ + -p tcp \ + -j "${target}"; then + mark_as_skipped "unable to set the filter rules" + return 1 + fi +} + +# $1: err msg +fail_test() +{ + if ! mptcp_lib_subtest_is_flaky; then + ret=${KSFT_FAIL} + fi + + if [ ${#} -gt 0 ]; then + print_fail "${@}" + fi + + # just in case a test is marked twice as failed + if [ ${last_test_failed} -eq 0 ]; then + failed_tests[${MPTCP_LIB_TEST_COUNTER}]="${TEST_NAME}" + dump_stats + last_test_failed=1 + fi +} + +get_failed_tests_ids() +{ + # sorted + local i + for i in "${!failed_tests[@]}"; do + echo "${i}" + done | sort -n +} + +check_transfer() +{ + local in=$1 + local out=$2 + local what=$3 + local bytes=$4 + local i a b + + local line + if [ -n "$bytes" ]; then + local out_size + # when truncating we must check the size explicitly + out_size=$(wc -c $out | awk '{print $1}') + if [ $out_size -ne $bytes ]; then + fail_test "$what output file has wrong size ($out_size, $bytes)" + return 1 + fi + + # note: BusyBox's "cmp" command doesn't support --bytes + tmpfile=$(mktemp) + head --bytes="$bytes" "$in" > "$tmpfile" + mv "$tmpfile" "$in" + head --bytes="$bytes" "$out" > "$tmpfile" + mv "$tmpfile" "$out" + tmpfile="" + fi + cmp -l "$in" "$out" | while read -r i a b; do + local sum=$((0${a} + 0${b})) + if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then + fail_test "$what does not match (in, out):" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" + + return 1 + else + print_info "$what has inverted byte at ${i}" + fi + done return 0 } do_ping() { - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + + if ! ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null; then + fail_test "$listener_ns -> $connect_addr connectivity" + fi +} + +link_failure() +{ + local ns="$1" + + if [ -z "$FAILING_LINKS" ]; then + l=$((RANDOM%4)) + FAILING_LINKS=$((l+1)) + fi + + local l + for l in $FAILING_LINKS; do + local veth="ns1eth$l" + ip -net "$ns" link set "$veth" down + done +} + +rm_addr_count() +{ + mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr" +} + +# $1: ns, $2: old rm_addr counter in $ns +wait_rm_addr() +{ + local ns="${1}" + local old_cnt="${2}" + local cnt + + local i + for i in $(seq 10); do + cnt=$(rm_addr_count ${ns}) + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + +rm_sf_count() +{ + mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow" +} + +# $1: ns, $2: old rm_sf counter in $ns +wait_rm_sf() +{ + local ns="${1}" + local old_cnt="${2}" + local cnt + + local i + for i in $(seq 10); do + cnt=$(rm_sf_count ${ns}) + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + +wait_mpj() +{ + local ns="${1}" + local cnt old_cnt + + old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") + + local i + for i in $(seq 10); do + cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + +kill_events_pids() +{ + mptcp_lib_kill_wait $evts_ns1_pid + evts_ns1_pid=0 + mptcp_lib_kill_wait $evts_ns2_pid + evts_ns2_pid=0 +} + +pm_nl_set_limits() +{ + mptcp_lib_pm_nl_set_limits "${@}" +} + +pm_nl_add_endpoint() +{ + mptcp_lib_pm_nl_add_endpoint "${@}" +} + +pm_nl_del_endpoint() +{ + mptcp_lib_pm_nl_del_endpoint "${@}" +} + +pm_nl_flush_endpoint() +{ + mptcp_lib_pm_nl_flush_endpoint "${@}" +} + +pm_nl_show_endpoints() +{ + mptcp_lib_pm_nl_show_endpoints "${@}" +} + +pm_nl_change_endpoint() +{ + mptcp_lib_pm_nl_change_endpoint "${@}" +} + +pm_nl_check_endpoint() +{ + local msg="$1" + local ns=$2 + local addr=$3 + local flags dev id port + + print_check "${msg}" + + shift 3 + while [ -n "$1" ]; do + case "${1}" in + "flags" | "dev" | "id" | "port") + eval "${1}"="${2}" + shift + ;; + *) + ;; + esac - ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null - if [ $? -ne 0 ] ; then - echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 - ret=1 + shift + done + + if [ -z "${id}" ]; then + test_fail "bad test - missing endpoint id" + return + fi + + check_output "mptcp_lib_pm_nl_get_endpoint ${ns} ${id}" \ + "$(mptcp_lib_pm_nl_format_endpoints \ + "${id},${addr},${flags//","/" "},${dev},${port}")" +} + +pm_nl_set_endpoint() +{ + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + + local addr_nr_ns1=${addr_nr_ns1:-0} + local addr_nr_ns2=${addr_nr_ns2:-0} + local sflags=${sflags:-""} + local fullmesh=${fullmesh:-""} + + local flags="subflow" + if [ -n "${fullmesh}" ]; then + flags="${flags},fullmesh" + addr_nr_ns2=${fullmesh} + fi + + # let the mptcp subflow be established in background before + # do endpoint manipulation + if [ $addr_nr_ns1 != "0" ] || [ $addr_nr_ns2 != "0" ]; then + sleep 1 + fi + + if [ $addr_nr_ns1 -gt 0 ]; then + local counter=2 + local add_nr_ns1=${addr_nr_ns1} + local id=10 + while [ $add_nr_ns1 -gt 0 ]; do + local addr + if mptcp_lib_is_v6 "${connect_addr}"; then + addr="dead:beef:$counter::1" + else + addr="10.0.$counter.1" + fi + pm_nl_add_endpoint $ns1 $addr flags signal + counter=$((counter + 1)) + add_nr_ns1=$((add_nr_ns1 - 1)) + id=$((id + 1)) + done + elif [ $addr_nr_ns1 -lt 0 ]; then + local rm_nr_ns1=$((-addr_nr_ns1)) + if [ $rm_nr_ns1 -lt 8 ]; then + local counter=0 + local line + pm_nl_show_endpoints ${listener_ns} | while read -r line; do + # shellcheck disable=SC2206 # we do want to split per word + local arr=($line) + local nr=0 + + local i + for i in "${arr[@]}"; do + if [ $i = "id" ]; then + if [ $counter -eq $rm_nr_ns1 ]; then + break + fi + id=${arr[$nr+1]} + rm_addr=$(rm_addr_count ${connector_ns}) + pm_nl_del_endpoint ${listener_ns} $id + wait_rm_addr ${connector_ns} ${rm_addr} + counter=$((counter + 1)) + fi + nr=$((nr + 1)) + done + done + elif [ $rm_nr_ns1 -eq 8 ]; then + pm_nl_flush_endpoint ${listener_ns} + elif [ $rm_nr_ns1 -eq 9 ]; then + pm_nl_del_endpoint ${listener_ns} 0 ${connect_addr} + fi + fi + + # if newly added endpoints must be deleted, give the background msk + # some time to created them + [ $addr_nr_ns1 -gt 0 ] && [ $addr_nr_ns2 -lt 0 ] && sleep 1 + + if [ $addr_nr_ns2 -gt 0 ]; then + local add_nr_ns2=${addr_nr_ns2} + local counter=3 + local id=20 + while [ $add_nr_ns2 -gt 0 ]; do + local addr + if mptcp_lib_is_v6 "${connect_addr}"; then + addr="dead:beef:$counter::2" + else + addr="10.0.$counter.2" + fi + pm_nl_add_endpoint $ns2 $addr flags $flags + counter=$((counter + 1)) + add_nr_ns2=$((add_nr_ns2 - 1)) + id=$((id + 1)) + done + elif [ $addr_nr_ns2 -lt 0 ]; then + local rm_nr_ns2=$((-addr_nr_ns2)) + if [ $rm_nr_ns2 -lt 8 ]; then + local counter=0 + local line + pm_nl_show_endpoints ${connector_ns} | while read -r line; do + # shellcheck disable=SC2206 # we do want to split per word + local arr=($line) + local nr=0 + + local i + for i in "${arr[@]}"; do + if [ $i = "id" ]; then + if [ $counter -eq $rm_nr_ns2 ]; then + break + fi + local id rm_addr + # rm_addr are serialized, allow the previous one to + # complete + id=${arr[$nr+1]} + rm_addr=$(rm_addr_count ${listener_ns}) + pm_nl_del_endpoint ${connector_ns} $id + wait_rm_addr ${listener_ns} ${rm_addr} + counter=$((counter + 1)) + fi + nr=$((nr + 1)) + done + done + elif [ $rm_nr_ns2 -eq 8 ]; then + pm_nl_flush_endpoint ${connector_ns} + elif [ $rm_nr_ns2 -eq 9 ]; then + local addr + if mptcp_lib_is_v6 "${connect_addr}"; then + addr="dead:beef:1::2" + else + addr="10.0.1.2" + fi + pm_nl_del_endpoint ${connector_ns} 0 $addr + fi + fi + + if [ -n "${sflags}" ]; then + sleep 1 + + local netns + for netns in "$ns1" "$ns2"; do + local line + pm_nl_show_endpoints $netns | while read -r line; do + # shellcheck disable=SC2206 # we do want to split per word + local arr=($line) + local nr=0 + local id + + local i + for i in "${arr[@]}"; do + if [ $i = "id" ]; then + id=${arr[$nr+1]} + fi + nr=$((nr + 1)) + done + pm_nl_change_endpoint $netns $id $sflags + done + done + fi +} + +chk_cestab_nr() +{ + local ns=$1 + local cestab=$2 + local count + + print_check "cestab $cestab" + count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$cestab" ]; then + fail_test "got $count current establish[s] expected $cestab" + else + print_ok + fi +} + +# $1 namespace 1, $2 namespace 2 +check_cestab() +{ + if [ -n "${cestab_ns1}" ]; then + chk_cestab_nr ${1} ${cestab_ns1} + fi + if [ -n "${cestab_ns2}" ]; then + chk_cestab_nr ${2} ${cestab_ns2} fi } do_transfer() { - listener_ns="$1" - connector_ns="$2" - cl_proto="$3" - srv_proto="$4" - connect_addr="$5" + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" - port=$((10000+$TEST_COUNT)) - TEST_COUNT=$((TEST_COUNT+1)) + local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) + local cappid + local FAILING_LINKS=${FAILING_LINKS:-""} + local fastclose=${fastclose:-""} + local speed=${speed:-"fast"} :> "$cout" :> "$sout" :> "$capout" - if [ $capture -eq 1 ]; then + if $capture; then + local capuser if [ -z $SUDO_USER ] ; then capuser="" else capuser="-Z $SUDO_USER" fi - capfile="mp_join-${listener_ns}.pcap" + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") - echo "Capturing traffic for test $TEST_COUNT into $capfile" + echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & cappid=$! sleep 1 fi - ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & - spid=$! + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n - sleep 1 + local extra_args + if [ $speed = "fast" ]; then + extra_args="-j" + elif [ $speed = "slow" ]; then + extra_args="-r 50" + elif [ $speed -gt 0 ]; then + extra_args="-r ${speed}" + fi + + local extra_cl_args="" + local extra_srv_args="" + local trunc_size="" + if [ -n "${fastclose}" ]; then + if [ ${test_linkfail} -le 1 ]; then + fail_test "fastclose tests need test_linkfail argument" + return 1 + fi + + # disconnect + trunc_size=${test_linkfail} + local side=${fastclose} + + if [ ${side} = "client" ]; then + extra_cl_args="-f ${test_linkfail}" + extra_srv_args="-f -1" + elif [ ${side} = "server" ]; then + extra_srv_args="-f ${test_linkfail}" + extra_cl_args="-f -1" + else + fail_test "wrong/unknown fastclose spec ${side}" + return 1 + fi + fi + + extra_srv_args="$extra_args $extra_srv_args" + if [ "$test_linkfail" -gt 1 ];then + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_srv_args "::" < "$sinfail" > "$sout" & + else + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_srv_args "::" < "$sin" > "$sout" & + fi + local spid=$! - ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & - cpid=$! + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" + + extra_cl_args="$extra_args $extra_cl_args" + if [ "$test_linkfail" -eq 0 ];then + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr < "$cin" > "$cout" & + elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then + ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ + tee "$cinsent" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & + else + tee "$cinsent" < "$cinfail" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & + fi + local cpid=$! + + pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr + check_cestab $listener_ns $connector_ns wait $cpid - retc=$? + local retc=$? wait $spid - rets=$? + local rets=$? - if [ $capture -eq 1 ]; then + if $capture; then sleep 1 kill $cappid fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo " client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 - ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 - ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + fail_test "client exit code $retc, server $rets" + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + cat /tmp/${listener_ns}.out + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + cat /tmp/${connector_ns}.out cat "$capout" return 1 fi - check_transfer $sin $cout "file received by client" + if [ "$test_linkfail" -gt 1 ];then + check_transfer $sinfail $cout "file received by client" $trunc_size + else + check_transfer $sin $cout "file received by client" $trunc_size + fi retc=$? - check_transfer $cin $sout "file received by server" + if [ "$test_linkfail" -eq 0 ];then + check_transfer $cin $sout "file received by server" $trunc_size + else + check_transfer $cinsent $sout "file received by server" $trunc_size + fi rets=$? if [ $retc -eq 0 ] && [ $rets -eq 0 ];then @@ -192,166 +1022,2624 @@ do_transfer() make_file() { - name=$1 - who=$2 - - SIZE=1 + local name=$1 + local who=$2 + local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size - echo "Created $name (size $SIZE KB) containing data sent by $who" + print_info "Test file (size $size KB) for $who" } run_tests() { - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" - lret=0 + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + + local size + local test_linkfail=${test_linkfail:-0} + + # The values above 2 are reused to make test files + # with the given sizes (KB) + if [ "$test_linkfail" -gt 2 ]; then + size=$test_linkfail + + if [ -z "$cinfail" ]; then + cinfail=$(mktemp) + fi + make_file "$cinfail" "client" $size + # create the input file for the failure test when + # the first failure test run + elif [ "$test_linkfail" -ne 0 ] && [ -z "$cinfail" ]; then + # the client file must be considerably larger + # of the maximum expected cwin value, or the + # link utilization will be not predicable + size=$((RANDOM%2)) + size=$((size+1)) + size=$((size*8192)) + size=$((size + ( RANDOM % 8192) )) + + cinfail=$(mktemp) + make_file "$cinfail" "client" $size + fi + + if [ "$test_linkfail" -gt 2 ]; then + size=$test_linkfail + + if [ -z "$sinfail" ]; then + sinfail=$(mktemp) + fi + make_file "$sinfail" "server" $size + elif [ "$test_linkfail" -eq 2 ] && [ -z "$sinfail" ]; then + size=$((RANDOM%16)) + size=$((size+1)) + size=$((size*2048)) + + sinfail=$(mktemp) + make_file "$sinfail" "server" $size + fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} - lret=$? - if [ $lret -ne 0 ]; then - ret=$lret - return +} + +dump_stats() +{ + echo Server ns stats + ip netns exec $ns1 nstat -as | grep Tcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep Tcp +} + +chk_csum_nr() +{ + local csum_ns1=${1:-0} + local csum_ns2=${2:-0} + local count + local extra_msg="" + local allow_multi_errors_ns1=0 + local allow_multi_errors_ns2=0 + + if [[ "${csum_ns1}" = "+"* ]]; then + allow_multi_errors_ns1=1 + csum_ns1=${csum_ns1:1} + fi + if [[ "${csum_ns2}" = "+"* ]]; then + allow_multi_errors_ns2=1 + csum_ns2=${csum_ns2:1} + fi + + print_check "sum" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") + if [ "$count" != "$csum_ns1" ]; then + extra_msg+=" ns1=$count" + fi + if [ -z "$count" ]; then + print_skip + elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || + { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then + fail_test "got $count data checksum error[s] expected $csum_ns1" + else + print_ok + fi + print_check "csum" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") + if [ "$count" != "$csum_ns2" ]; then + extra_msg+=" ns2=$count" + fi + if [ -z "$count" ]; then + print_skip + elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || + { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then + fail_test "got $count data checksum error[s] expected $csum_ns2" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_fail_nr() +{ + local fail_tx=$1 + local fail_rx=$2 + local ns_invert=${3:-""} + local count + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" + local allow_tx_lost=0 + local allow_rx_lost=0 + + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + if [[ "${fail_tx}" = "-"* ]]; then + allow_tx_lost=1 + fail_tx=${fail_tx:1} + fi + if [[ "${fail_rx}" = "-"* ]]; then + allow_rx_lost=1 + fail_rx=${fail_rx:1} + fi + + print_check "ftx" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") + if [ "$count" != "$fail_tx" ]; then + extra_msg+=",tx=$count" + fi + if [ -z "$count" ]; then + print_skip + elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || + { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then + fail_test "got $count MP_FAIL[s] TX expected $fail_tx" + else + print_ok + fi + + print_check "failrx" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") + if [ "$count" != "$fail_rx" ]; then + extra_msg+=",rx=$count" + fi + if [ -z "$count" ]; then + print_skip + elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || + { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then + fail_test "got $count MP_FAIL[s] RX expected $fail_rx" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_fclose_nr() +{ + local fclose_tx=$1 + local fclose_rx=$2 + local ns_invert=$3 + local count + local ns_tx=$ns2 + local ns_rx=$ns1 + local extra_msg="" + + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns1 + ns_rx=$ns2 + extra_msg="invert" + fi + + print_check "ctx" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$fclose_tx" ]; then + extra_msg+=",tx=$count" + fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" + else + print_ok + fi + + print_check "fclzrx" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$fclose_rx" ]; then + extra_msg+=",rx=$count" + fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_rst_nr() +{ + local rst_tx=$1 + local rst_rx=$2 + local ns_invert=${3:-""} + local count + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" + + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + print_check "rtx" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") + if [ -z "$count" ]; then + print_skip + # accept more rst than expected except if we don't expect any + elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } || + { [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then + fail_test "got $count MP_RST[s] TX expected $rst_tx" + else + print_ok + fi + + print_check "rstrx" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") + if [ -z "$count" ]; then + print_skip + # accept more rst than expected except if we don't expect any + elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } || + { [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then + fail_test "got $count MP_RST[s] RX expected $rst_rx" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_infi_nr() +{ + local infi_tx=$1 + local infi_rx=$2 + local count + + print_check "itx" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$infi_tx" ]; then + fail_test "got $count infinite map[s] TX expected $infi_tx" + else + print_ok + fi + + print_check "infirx" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$infi_rx" ]; then + fail_test "got $count infinite map[s] RX expected $infi_rx" + else + print_ok fi } chk_join_nr() { - local msg="$1" - local syn_nr=$2 - local syn_ack_nr=$3 - local ack_nr=$4 + local syn_nr=$1 + local syn_ack_nr=$2 + local ack_nr=$3 + local csum_ns1=${4:-0} + local csum_ns2=${5:-0} + local fail_nr=${6:-0} + local rst_nr=${7:-0} + local infi_nr=${8:-0} + local corrupted_pkts=${9:-0} local count + local with_cookie + + if [ "${corrupted_pkts}" -gt 0 ]; then + print_info "${corrupted_pkts} corrupted pkts" + fi + + print_check "syn" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$syn_nr" ]; then + fail_test "got $count JOIN[s] syn expected $syn_nr" + else + print_ok + fi + + print_check "synack" + with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$syn_ack_nr" ]; then + # simult connections exceeding the limit with cookie enabled could go up to + # synack validation as the conn limit can be enforced reliably only after + # the subflow creation + if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then + print_ok + else + fail_test "got $count JOIN[s] synack expected $syn_ack_nr" + fi + else + print_ok + fi + + print_check "ack" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$ack_nr" ]; then + fail_test "got $count JOIN[s] ack expected $ack_nr" + else + print_ok + fi + if $validate_checksum; then + chk_csum_nr $csum_ns1 $csum_ns2 + chk_fail_nr $fail_nr $fail_nr + chk_rst_nr $rst_nr $rst_nr + chk_infi_nr $infi_nr $infi_nr + fi +} + +# a negative value for 'stale_max' means no upper bound: +# for bidirectional transfer, if one peer sleep for a while +# - as these tests do - we can have a quite high number of +# stale/recover conversions, proportional to +# sleep duration/ MPTCP-level RTX interval. +chk_stale_nr() +{ + local ns=$1 + local stale_min=$2 + local stale_max=$3 + local stale_delta=$4 local dump_stats + local stale_nr + local recover_nr + + print_check "stale" - printf "%-36s %s" "$msg" "syn" - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_nr" ]; then - echo "[fail] got $count JOIN[s] syn expected $syn_nr" - ret=1 + stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale") + recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover") + if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then + print_skip + elif [ $stale_nr -lt $stale_min ] || + { [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } || + [ $((stale_nr - recover_nr)) -ne $stale_delta ]; then + fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \ + " expected stale in range [$stale_min..$stale_max]," \ + " stale-recover delta $stale_delta" dump_stats=1 else - echo -n "[ ok ]" + print_ok fi - echo -n " - synack" - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_ack_nr" ]; then - echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" - ret=1 - dump_stats=1 + if [ "${dump_stats}" = 1 ]; then + echo $ns stats + ip netns exec $ns ip -s link show + ip netns exec $ns nstat -as | grep MPTcp + fi +} + +chk_add_nr() +{ + local add_nr=$1 + local echo_nr=$2 + local port_nr=${3:-0} + local syn_nr=${4:-$port_nr} + local syn_ack_nr=${5:-$port_nr} + local ack_nr=${6:-$port_nr} + local mis_syn_nr=${7:-0} + local mis_ack_nr=${8:-0} + local count + local timeout + + timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + + print_check "add" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + if [ -z "$count" ]; then + print_skip + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + fail_test "got $count ADD_ADDR[s] expected $add_nr" + else + print_ok + fi + + print_check "echo" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$echo_nr" ]; then + fail_test "got $count ADD_ADDR echo[s] expected $echo_nr" + else + print_ok + fi + + if [ $port_nr -gt 0 ]; then + print_check "pt" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$port_nr" ]; then + fail_test "got $count ADD_ADDR[s] with a port-number expected $port_nr" + else + print_ok + fi + + print_check "syn" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$syn_nr" ]; then + fail_test "got $count JOIN[s] syn with a different \ + port-number expected $syn_nr" + else + print_ok + fi + + print_check "synack" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$syn_ack_nr" ]; then + fail_test "got $count JOIN[s] synack with a different \ + port-number expected $syn_ack_nr" + else + print_ok + fi + + print_check "ack" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$ack_nr" ]; then + fail_test "got $count JOIN[s] ack with a different \ + port-number expected $ack_nr" + else + print_ok + fi + + print_check "syn" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mis_syn_nr" ]; then + fail_test "got $count JOIN[s] syn with a mismatched \ + port-number expected $mis_syn_nr" + else + print_ok + fi + + print_check "ack" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mis_ack_nr" ]; then + fail_test "got $count JOIN[s] ack with a mismatched \ + port-number expected $mis_ack_nr" + else + print_ok + fi + fi +} + +chk_add_tx_nr() +{ + local add_tx_nr=$1 + local echo_tx_nr=$2 + local timeout + local count + + timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + + print_check "add TX" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") + if [ -z "$count" ]; then + print_skip + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" + else + print_ok + fi + + print_check "echo TX" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$echo_tx_nr" ]; then + fail_test "got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr" + else + print_ok + fi +} + +chk_rm_nr() +{ + local rm_addr_nr=$1 + local rm_subflow_nr=$2 + local invert + local simult + local count + local addr_ns=$ns1 + local subflow_ns=$ns2 + local extra_msg="" + + shift 2 + while [ -n "$1" ]; do + [ "$1" = "invert" ] && invert=true + [ "$1" = "simult" ] && simult=true + shift + done + + if [ -z $invert ]; then + addr_ns=$ns1 + subflow_ns=$ns2 + elif [ $invert = "true" ]; then + addr_ns=$ns2 + subflow_ns=$ns1 + extra_msg="invert" + fi + + print_check "rm" + count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$rm_addr_nr" ]; then + fail_test "got $count RM_ADDR[s] expected $rm_addr_nr" + else + print_ok + fi + + print_check "rmsf" + count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") + if [ -z "$count" ]; then + print_skip + elif [ -n "$simult" ]; then + local cnt suffix + + cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow") + + # in case of simult flush, the subflow removal count on each side is + # unreliable + count=$((count + cnt)) + if [ "$count" != "$rm_subflow_nr" ]; then + suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" + extra_msg+=" simult" + fi + if [ $count -ge "$rm_subflow_nr" ] && \ + [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then + print_ok "$suffix" + else + fail_test "got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]" + fi + elif [ "$count" != "$rm_subflow_nr" ]; then + fail_test "got $count RM_SUBFLOW[s] expected $rm_subflow_nr" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_rm_tx_nr() +{ + local rm_addr_tx_nr=$1 + + print_check "rm TX" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$rm_addr_tx_nr" ]; then + fail_test "got $count RM_ADDR[s] expected $rm_addr_tx_nr" + else + print_ok + fi +} + +chk_prio_nr() +{ + local mp_prio_nr_tx=$1 + local mp_prio_nr_rx=$2 + local count + + print_check "ptx" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mp_prio_nr_tx" ]; then + fail_test "got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" + else + print_ok + fi + + print_check "prx" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mp_prio_nr_rx" ]; then + fail_test "got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" else - echo -n "[ ok ]" + print_ok fi +} - echo -n " - ack" - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$ack_nr" ]; then - echo "[fail] got $count JOIN[s] ack expected $ack_nr" - ret=1 +chk_subflow_nr() +{ + local msg="$1" + local subflow_nr=$2 + local cnt1 + local cnt2 + local dump_stats + + print_check "${msg}" + + cnt1=$(ss -N $ns1 -tOni | grep -c token) + cnt2=$(ss -N $ns2 -tOni | grep -c token) + if [ "$cnt1" != "$subflow_nr" ] || [ "$cnt2" != "$subflow_nr" ]; then + fail_test "got $cnt1:$cnt2 subflows expected $subflow_nr" dump_stats=1 else - echo "[ ok ]" + print_ok fi + if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp - fi -} - -sin=$(mktemp) -sout=$(mktemp) -cin=$(mktemp) -cout=$(mktemp) -init -make_file "$cin" "client" -make_file "$sin" "server" -trap cleanup EXIT - -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "no JOIN" "0" "0" "0" - -# subflow limted by client -reset -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow, limited by client" 0 0 0 - -# subflow limted by server -reset -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow, limited by server" 1 1 0 - -# subflow -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow" 1 1 1 - -# multiple subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows" 2 2 2 - -# multiple subflows limited by serverf -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows, limited by server" 2 2 1 - -# add_address, unused -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "unused signal address" 0 0 0 - -# accept and use add_addr -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address" 1 1 1 - -# accept and use add_addr with an additional subflow -# note: signal address in server ns and local addresses in client ns must -# belong to different subnets or one of the listed local address could be -# used for 'add_addr' subflow -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflow and signal" 2 2 2 - -# accept and use add_addr with additional subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows and signal" 3 3 3 + ss -N $ns1 -tOni + ss -N $ns1 -tOni | grep token + ip -n $ns1 mptcp endpoint + fi +} + +chk_mptcp_info() +{ + local info1=$1 + local exp1=$2 + local info2=$3 + local exp2=$4 + local cnt1 + local cnt2 + local dump_stats + + print_check "mptcp_info ${info1:0:15}=$exp1:$exp2" + + cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1") + cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2") + # 'ss' only display active connections and counters that are not 0. + [ -z "$cnt1" ] && cnt1=0 + [ -z "$cnt2" ] && cnt2=0 + + if [ "$cnt1" != "$exp1" ] || [ "$cnt2" != "$exp2" ]; then + fail_test "got $cnt1:$cnt2 $info1:$info2 expected $exp1:$exp2" + dump_stats=1 + else + print_ok + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -inmHM + ss -N $ns2 -inmHM + fi +} + +# $1: subflows in ns1 ; $2: subflows in ns2 +# number of all subflows, including the initial subflow. +chk_subflows_total() +{ + local cnt1 + local cnt2 + local info="subflows_total" + local dump_stats + + # if subflows_total counter is supported, use it: + if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then + chk_mptcp_info $info $1 $info $2 + return + fi + + print_check "$info $1:$2" + + # if not, count the TCP connections that are in fact MPTCP subflows + cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + + if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then + fail_test "got subflows $cnt1:$cnt2 expected $1:$2" + dump_stats=1 + else + print_ok + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -ti + ss -N $ns2 -ti + fi +} + +chk_link_usage() +{ + local ns=$1 + local link=$2 + local out=$3 + local expected_rate=$4 + + local tx_link tx_total + tx_link=$(ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes) + tx_total=$(stat --format=%s $out) + local tx_rate=$((tx_link * 100 / tx_total)) + local tolerance=5 + + print_check "link usage" + if [ $tx_rate -lt $((expected_rate - tolerance)) ] || \ + [ $tx_rate -gt $((expected_rate + tolerance)) ]; then + fail_test "got $tx_rate% usage, expected $expected_rate%" + else + print_ok + fi +} + +wait_attempt_fail() +{ + local timeout_ms=$((timeout_poll * 1000)) + local time=0 + local ns=$1 + + while [ $time -lt $timeout_ms ]; do + local cnt + + cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails") + + [ "$cnt" = 1 ] && return 1 + time=$((time + 100)) + sleep 0.1 + done + return 1 +} + +set_userspace_pm() +{ + local ns=$1 + + ip netns exec $ns sysctl -q net.mptcp.pm_type=1 +} + +subflows_tests() +{ + if reset "no JOIN"; then + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # subflow limited by client + if reset "single subflow, limited by client"; then + pm_nl_set_limits $ns1 0 0 + pm_nl_set_limits $ns2 0 0 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # subflow limited by server + if reset "single subflow, limited by server"; then + pm_nl_set_limits $ns1 0 0 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 0 + fi + + # subflow + if reset "single subflow"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # multiple subflows + if reset "multiple subflows"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi + + # multiple subflows limited by server + if reset "multiple subflows, limited by server"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 1 + fi + + # single subflow, dev + if reset "single subflow, dev"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi +} + +subflows_error_tests() +{ + # If a single subflow is configured, and matches the MPC src + # address, no additional subflow should be created + if reset "no MPC reuse with single endpoint"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # multiple subflows, with subflow creation error + if reset_with_tcp_filter "multi subflows, with failing subflow" ns1 10.0.3.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # multiple subflows, with subflow timeout on MPJ + if reset_with_tcp_filter "multi subflows, with subflow timeout" ns1 10.0.3.2 DROP && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # multiple subflows, check that the endpoint corresponding to + # closed subflow (due to reset) is not reused if additional + # subflows are added later + if reset_with_tcp_filter "multi subflows, fair usage on close" ns1 10.0.3.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & + + # mpj subflow will be in TW after the reset + wait_attempt_fail $ns2 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + wait + + # additional subflow could be created only if the PM select + # the later endpoint, skipping the already used one + chk_join_nr 1 1 1 + fi +} + +signal_address_tests() +{ + # add_address, unused + if reset "unused signal address"; then + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_tx_nr 1 1 + chk_add_nr 1 1 + fi + + # accept and use add_addr + if reset "signal address"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # accept and use add_addr with an additional subflow + # note: signal address in server ns and local addresses in client ns must + # belong to different subnets or one of the listed local address could be + # used for 'add_addr' subflow + if reset "subflow and signal"; then + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + fi + + # accept and use add_addr with additional subflows + if reset "multiple subflows and signal"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + fi + + # signal addresses + if reset "signal addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 3 3 + fi + + # signal invalid addresses + if reset "signal invalid addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 3 3 + fi + + # signal addresses race test + if reset "signal addresses race test"; then + pm_nl_set_limits $ns1 4 4 + pm_nl_set_limits $ns2 4 4 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags signal + pm_nl_add_endpoint $ns2 10.0.2.2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal + pm_nl_add_endpoint $ns2 10.0.4.2 flags signal + + # the peer could possibly miss some addr notification, allow retransmission + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + + # It is not directly linked to the commit introducing this + # symbol but for the parent one which is linked anyway. + if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + chk_join_nr 3 3 2 + chk_add_nr 4 4 + else + chk_join_nr 3 3 3 + # the server will not signal the address terminating + # the MPC subflow + chk_add_nr 3 3 + fi + fi +} + +link_failure_tests() +{ + # accept and use add_addr with additional subflows and link loss + if reset "multiple flows, signal, link failure"; then + # without any b/w limit each veth could spool the packets and get + # them acked at xmit time, so that the corresponding subflow will + # have almost always no outstanding pkts, the scheduler will pick + # always the first subflow and we will have hard time testing + # active backup and link switch-over. + # Let's set some arbitrary (low) virtual link limits. + init_shapers + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow + test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_stale_nr $ns2 1 5 1 + fi + + # accept and use add_addr with additional subflows and link loss + # for bidirectional transfer + if reset "multi flows, signal, bidi, link fail"; then + init_shapers + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow + test_linkfail=2 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_stale_nr $ns2 1 -1 1 + fi + + # 2 subflows plus 1 backup subflow with a lossy link, backup + # will never be used + if reset "backup subflow unused, link failure"; then + init_shapers + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup + FAILING_LINKS="1" test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_link_usage $ns2 ns2eth3 $cinsent 0 + fi + + # 2 lossy links after half transfer, backup will get half of + # the traffic + if reset "backup flow used, multi links fail"; then + init_shapers + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup + FAILING_LINKS="1 2" test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_stale_nr $ns2 2 4 2 + chk_link_usage $ns2 ns2eth3 $cinsent 50 + fi + + # use a backup subflow with the first subflow on a lossy link + # for bidirectional transfer + if reset "backup flow used, bidi, link failure"; then + init_shapers + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup + FAILING_LINKS="1 2" test_linkfail=2 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_stale_nr $ns2 1 -1 2 + chk_link_usage $ns2 ns2eth3 $cinsent 50 + fi +} + +add_addr_timeout_tests() +{ + # add_addr timeout + if reset_with_add_addr_timeout "signal address, ADD_ADDR timeout"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_tx_nr 4 4 + chk_add_nr 4 0 + fi + + # add_addr timeout IPv6 + if reset_with_add_addr_timeout "signal address, ADD_ADDR6 timeout" 6; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 4 0 + fi + + # signal addresses timeout + if reset_with_add_addr_timeout "signal addresses, ADD_ADDR timeout"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_set_limits $ns2 2 2 + speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 8 0 + fi + + # signal invalid addresses timeout + if reset_with_add_addr_timeout "invalid address, ADD_ADDR timeout"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_set_limits $ns2 2 2 + speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 8 0 + fi +} + +remove_tests() +{ + # single subflow, remove + if reset "remove single subflow"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_rm_tx_nr 1 + chk_rm_nr 1 1 + chk_rst_nr 0 0 + fi + + # multiple subflows, remove + if reset "remove multiple subflows"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns2=-2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_rm_nr 2 2 + chk_rst_nr 0 0 + fi + + # single address, remove + if reset "remove single address"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + chk_rst_nr 0 0 + fi + + # subflow and signal, remove + if reset "remove subflow and signal"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 + chk_rst_nr 0 0 + fi + + # subflows and signal, remove + if reset "remove subflows and signal"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 + chk_rst_nr 0 0 + fi + + # addresses remove + if reset "remove addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 + addr_nr_ns1=-3 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert + chk_rst_nr 0 0 + fi + + # invalid addresses remove + if reset "remove invalid addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 + addr_nr_ns1=-3 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert + chk_rst_nr 0 0 + fi + + # subflows and signal, flush + if reset "flush subflows and signal"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 1 3 invert simult + chk_rst_nr 0 0 + fi + + # subflows flush + if reset "flush subflows"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_set_limits $ns2 3 3 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + + if mptcp_lib_kversion_ge 5.18; then + chk_rm_tx_nr 0 + chk_rm_nr 0 3 simult + else + chk_rm_nr 3 3 + fi + chk_rst_nr 0 0 + fi + + # addresses flush + if reset "flush addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert simult + chk_rst_nr 0 0 + fi + + # invalid addresses flush + if reset "flush invalid addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 + addr_nr_ns1=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert + chk_rst_nr 0 0 + fi + + # remove id 0 subflow + if reset "remove id 0 subflow"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns2=-9 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_rm_nr 1 1 + chk_rst_nr 0 0 + fi + + # remove id 0 address + if reset "remove id 0 address"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=-9 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + chk_rst_nr 0 0 invert + fi +} + +add_tests() +{ + # add single subflow + if reset "add single subflow"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + addr_nr_ns2=1 speed=slow cestab_ns2=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_cestab_nr $ns2 0 + fi + + # add signal address + if reset "add signal address"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=1 speed=slow cestab_ns1=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_cestab_nr $ns1 0 + fi + + # add multiple subflows + if reset "add multiple subflows"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 + fi + + # add multiple subflows IPv6 + if reset "add multiple subflows IPv6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 + fi + + # add multiple addresses IPv6 + if reset "add multiple addresses IPv6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + addr_nr_ns1=2 speed=slow cestab_ns1=1 \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_cestab_nr $ns1 0 + fi +} + +ipv6_tests() +{ + # subflow IPv6 + if reset "single subflow IPv6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + fi + + # add_address, unused IPv6 + if reset "unused signal address IPv6"; then + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # signal address IPv6 + if reset "single address IPv6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 1 + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # single address IPv6, remove + if reset "remove single address IPv6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + fi + + # subflow and signal IPv6, remove + if reset "remove subflow and signal IPv6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 + fi +} + +v4mapped_tests() +{ + # subflow IPv4-mapped to IPv4-mapped + if reset "single subflow IPv4-mapped"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr 1 1 1 + fi + + # signal address IPv4-mapped with IPv4-mapped sk + if reset "signal address IPv4-mapped"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # subflow v4-map-v6 + if reset "single subflow v4-map-v6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr 1 1 1 + fi + + # signal address v4-map-v6 + if reset "signal address v4-map-v6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # subflow v6-map-v4 + if reset "single subflow v6-map-v4"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # signal address v6-map-v4 + if reset "signal address v6-map-v4"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # no subflow IPv6 to v4 address + if reset "no JOIN with diff families v4-v6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # no subflow IPv6 to v4 address even if v6 has a valid v4 at the end + if reset "no JOIN with diff families v4-v6-2"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # no subflow IPv4 to v6 address, no need to slow down too then + if reset "no JOIN with diff families v6-v4"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 0 0 0 + fi +} + +mixed_tests() +{ + if reset "IPv4 sockets do not use IPv6 addresses" && + continue_if mptcp_lib_kversion_ge 6.3; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # Need an IPv6 mptcp socket to allow subflows of both families + if reset "simult IPv4 and IPv6 subflows" && + continue_if mptcp_lib_kversion_ge 6.3; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + speed=slow \ + run_tests $ns1 $ns2 dead:beef:2::1 + chk_join_nr 1 1 1 + fi + + # cross families subflows will not be created even in fullmesh mode + if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1" && + continue_if mptcp_lib_kversion_ge 6.3; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + speed=slow \ + run_tests $ns1 $ns2 dead:beef:2::1 + chk_join_nr 1 1 1 + fi + + # fullmesh still tries to create all the possibly subflows with + # matching family + if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2" && + continue_if mptcp_lib_kversion_ge 6.3; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + fullmesh=1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 4 4 4 + fi +} + +backup_tests() +{ + # single subflow, backup + if reset "single subflow, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_prio_nr 0 1 + fi + + # single address, backup + if reset "single address, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 1 + fi + + # single address with port, backup + if reset "single address with port, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 1 + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 1 + fi + + if reset "mpc backup" && + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc backup both sides" && + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi + + if reset "mpc switch to backup" && + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc switch to backup both sides" && + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi +} + +verify_listener_events() +{ + local e_type=$2 + local e_saddr=$4 + local e_sport=$5 + local name + + if [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CREATED ]; then + name="LISTENER_CREATED" + elif [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CLOSED ]; then + name="LISTENER_CLOSED " + else + name="$e_type" + fi + + print_check "$name $e_saddr:$e_sport" + + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_skip "event not supported" + return + fi + + if mptcp_lib_verify_listener_events "${@}"; then + print_ok + return 0 + fi + fail_test +} + +add_addr_ports_tests() +{ + # signal address with port + if reset "signal address with port"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 1 + fi + + # subflow and signal with port + if reset "subflow and signal with port"; then + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 1 + fi + + # single address with port, remove + # pm listener events + if reset_with_events "remove single address with port"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 1 + chk_rm_nr 1 1 invert + + verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CREATED \ + $MPTCP_LIB_AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CLOSED \ + $MPTCP_LIB_AF_INET 10.0.2.1 10100 + kill_events_pids + fi + + # subflow and signal with port, remove + if reset "remove subflow and signal with port"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 1 + chk_rm_nr 1 1 + fi + + # subflows and signal with port, flush + if reset "flush subflows and signal with port"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + addr_nr_ns1=-8 addr_nr_ns2=-2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 1 3 invert simult + fi + + # multiple addresses with port + if reset "multiple addresses with port"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100 + pm_nl_set_limits $ns2 2 2 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 2 + fi + + # multiple addresses with ports + if reset "multiple addresses with ports"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101 + pm_nl_set_limits $ns2 2 2 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 2 + fi +} + +syncookies_tests() +{ + # single subflow, syncookies + if reset_with_cookies "single subflow with syn cookies"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # multiple subflows with syn cookies + if reset_with_cookies "multiple subflows with syn cookies"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi + + # multiple subflows limited by server + if reset_with_cookies "subflows limited by server w cookies"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 1 1 + fi + + # test signal address with cookies + if reset_with_cookies "signal address with syn cookies"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # test cookie with subflow and signal + if reset_with_cookies "subflow and signal w cookies"; then + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + fi + + # accept and use add_addr with additional subflows + if reset_with_cookies "subflows and signal w. cookies"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + fi +} + +checksum_tests() +{ + # checksum test 0 0 + if reset_with_checksum 0 0; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # checksum test 1 1 + if reset_with_checksum 1 1; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # checksum test 0 1 + if reset_with_checksum 0 1; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # checksum test 1 0 + if reset_with_checksum 1 0; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi +} + +deny_join_id0_tests() +{ + # subflow allow join id0 ns1 + if reset_with_allow_join_id0 "single subflow allow join id0 ns1" 1 0; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # subflow allow join id0 ns2 + if reset_with_allow_join_id0 "single subflow allow join id0 ns2" 0 1; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # signal address allow join id0 ns1 + # ADD_ADDRs are not affected by allow_join_id0 value. + if reset_with_allow_join_id0 "signal address allow join id0 ns1" 1 0; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # signal address allow join id0 ns2 + # ADD_ADDRs are not affected by allow_join_id0 value. + if reset_with_allow_join_id0 "signal address allow join id0 ns2" 0 1; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # subflow and address allow join id0 ns1 + if reset_with_allow_join_id0 "subflow and address allow join id0 1" 1 0; then + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi + + # subflow and address allow join id0 ns2 + if reset_with_allow_join_id0 "subflow and address allow join id0 2" 0 1; then + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi +} + +fullmesh_tests() +{ + # fullmesh 1 + # 2 fullmesh addrs in ns2, added before the connection, + # 1 non-fullmesh addr in ns1, added during the connection. + if reset "fullmesh test 2x1"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh + addr_nr_ns1=1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 4 4 4 + chk_add_nr 1 1 + fi + + # fullmesh 2 + # 1 non-fullmesh addr in ns1, added before the connection, + # 1 fullmesh addr in ns2, added during the connection. + if reset "fullmesh test 1x1"; then + pm_nl_set_limits $ns1 1 3 + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + fullmesh=1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + fi + + # fullmesh 3 + # 1 non-fullmesh addr in ns1, added before the connection, + # 2 fullmesh addrs in ns2, added during the connection. + if reset "fullmesh test 1x2"; then + pm_nl_set_limits $ns1 2 5 + pm_nl_set_limits $ns2 1 5 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + fullmesh=2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 5 5 5 + chk_add_nr 1 1 + fi + + # fullmesh 4 + # 1 non-fullmesh addr in ns1, added before the connection, + # 2 fullmesh addrs in ns2, added during the connection, + # limit max_subflows to 4. + if reset "fullmesh test 1x2, limited"; then + pm_nl_set_limits $ns1 2 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + fullmesh=2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 4 4 4 + chk_add_nr 1 1 + fi + + # set fullmesh flag + if reset "set fullmesh flag test" && + continue_if mptcp_lib_kversion_ge 5.18; then + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow + pm_nl_set_limits $ns2 4 4 + addr_nr_ns2=1 sflags=fullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_rm_nr 0 1 + fi + + # set nofullmesh flag + if reset "set nofullmesh flag test" && + continue_if mptcp_lib_kversion_ge 5.18; then + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh + pm_nl_set_limits $ns2 4 4 + fullmesh=1 sflags=nofullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_rm_nr 0 1 + fi + + # set backup,fullmesh flags + if reset "set backup,fullmesh flags test" && + continue_if mptcp_lib_kversion_ge 5.18; then + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow + pm_nl_set_limits $ns2 4 4 + addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 + fi + + # set nobackup,nofullmesh flags + if reset "set nobackup,nofullmesh flags test" && + continue_if mptcp_lib_kversion_ge 5.18; then + pm_nl_set_limits $ns1 4 4 + pm_nl_set_limits $ns2 4 4 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh + sflags=nobackup,nofullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 + fi +} + +fastclose_tests() +{ + if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then + MPTCP_LIB_SUBTEST_FLAKY=1 + test_linkfail=1024 fastclose=client \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_fclose_nr 1 1 + chk_rst_nr 1 1 invert + fi + + if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then + MPTCP_LIB_SUBTEST_FLAKY=1 + test_linkfail=1024 fastclose=server \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 0 0 0 1 + chk_fclose_nr 1 1 invert + chk_rst_nr 1 1 + fi +} + +pedit_action_pkts() +{ + tc -n $ns2 -j -s action show action pedit index 100 | \ + mptcp_lib_get_info_value \"packets\" packets +} + +fail_tests() +{ + # single subflow + if reset_with_fail "Infinite map" 1; then + MPTCP_LIB_SUBTEST_FLAKY=1 + test_linkfail=128 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" + chk_fail_nr 1 -1 invert + fi + + # multiple subflows + if reset_with_fail "MP_FAIL MP_RST" 2; then + MPTCP_LIB_SUBTEST_FLAKY=1 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + test_linkfail=1024 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" + fi +} + +# $1: ns ; $2: addr ; $3: id +userspace_pm_add_addr() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3 + sleep 1 +} + +# $1: ns ; $2: id +userspace_pm_rm_addr() +{ + local evts=$evts_ns1 + local tk + local cnt + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + cnt=$(rm_addr_count ${1}) + ip netns exec $1 ./pm_nl_ctl rem token $tk id $2 + wait_rm_addr $1 "${cnt}" +} + +# $1: ns ; $2: addr ; $3: id +userspace_pm_add_sf() +{ + local evts=$evts_ns1 + local tk da dp + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info daddr4 "$evts") + dp=$(mptcp_lib_evts_get_info dport "$evts") + + ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \ + rip $da rport $dp token $tk + sleep 1 +} + +# $1: ns ; $2: addr $3: event type +userspace_pm_rm_sf() +{ + local evts=$evts_ns1 + local t=${3:-1} + local ip + local tk da dp sp + local cnt + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4 + [ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6 + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2) + dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2) + sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2) + + cnt=$(rm_sf_count ${1}) + ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \ + rip $da rport $dp token $tk + wait_rm_sf $1 "${cnt}" +} + +check_output() +{ + local cmd="$1" + local expected="$2" + local msg="$3" + local rc=0 + + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + fail_test "fail to check output # error ${rc}" + elif [ ${rc} -eq 0 ]; then + print_ok + elif [ ${rc} -eq 1 ]; then + fail_test "fail to check output # different output" + fi +} + +# $1: ns +userspace_pm_dump() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl dump token $tk +} + +# $1: ns ; $2: id +userspace_pm_get_addr() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl get $2 token $tk +} + +userspace_pm_chk_dump_addr() +{ + local ns="${1}" + local exp="${2}" + local check="${3}" + + print_check "dump addrs ${check}" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then + check_output "userspace_pm_dump ${ns}" "${exp}" + else + print_skip + fi +} + +userspace_pm_chk_get_addr() +{ + local ns="${1}" + local id="${2}" + local exp="${3}" + + print_check "get id ${id} addr" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then + check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}" + else + print_skip + fi +} + +userspace_tests() +{ + # userspace pm type prevents add_addr + if reset "userspace pm type prevents add_addr" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 0 0 + fi + + # userspace pm type does not echo add_addr without daemon + if reset "userspace pm no echo w/o daemon" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 1 0 + fi + + # userspace pm type rejects join + if reset "userspace pm type rejects join" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 0 + fi + + # userspace pm type does not send join + if reset "userspace pm type does not send join" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # userspace pm type prevents mp_prio + if reset "userspace pm type prevents mp_prio" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 0 + chk_prio_nr 0 0 + fi + + # userspace pm type prevents rm_addr + if reset "userspace pm type prevents rm_addr" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_rm_nr 0 0 + fi + + # userspace pm add & remove address + if reset_with_events "userspace pm add & remove address" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 2 2 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr $ns1 10.0.2.1 10 + userspace_pm_add_addr $ns1 10.0.3.1 20 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_mptcp_info subflows 2 subflows 2 + chk_subflows_total 3 3 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + userspace_pm_chk_dump_addr "${ns1}" \ + $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \ + "signal" + userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" + userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" + userspace_pm_rm_addr $ns1 10 + userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" \ + "id 20 flags signal 10.0.3.1" "after rm_addr 10" + userspace_pm_rm_addr $ns1 20 + userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" + chk_rm_nr 2 2 invert + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm create destroy subflow + if reset_with_events "userspace pm create destroy subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf $ns2 10.0.3.2 20 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 20 flags subflow 10.0.3.2" \ + "subflow" + userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" + userspace_pm_rm_addr $ns2 20 + userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns2}" \ + "" \ + "after rm_addr 20" + chk_rm_nr 1 1 + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm create id 0 subflow + if reset_with_events "userspace pm create id 0 subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + userspace_pm_add_sf $ns2 10.0.3.2 0 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 0 flags subflow 10.0.3.2" "id 0 subflow" + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm remove initial subflow + if reset_with_events "userspace pm remove initial subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf $ns2 10.0.3.2 20 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + userspace_pm_rm_sf $ns2 10.0.1.2 + # we don't look at the counter linked to the RM_ADDR but + # to the one linked to the subflows that have been removed + chk_rm_nr 0 1 + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm send RM_ADDR for ID 0 + if reset_with_events "userspace pm send RM_ADDR for ID 0" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr $ns1 10.0.2.1 10 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_rm_addr $ns1 0 + # we don't look at the counter linked to the subflows that + # have been removed but to the one linked to the RM_ADDR + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi +} + +endpoint_tests() +{ + # subflow_rebuild_header is needed to support the implicit flag + # userspace pm type prevents add_addr + if reset "implicit EP" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_mpj $ns1 + pm_nl_check_endpoint "creation" \ + $ns2 10.0.2.2 id 1 flags implicit + chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + + pm_nl_add_endpoint $ns2 10.0.2.2 id 33 2>/dev/null + pm_nl_check_endpoint "ID change is prevented" \ + $ns2 10.0.2.2 id 1 flags implicit + + pm_nl_add_endpoint $ns2 10.0.2.2 flags signal + pm_nl_check_endpoint "modif is allowed" \ + $ns2 10.0.2.2 id 1 flags signal + mptcp_lib_kill_wait $tests_pid + fi + + if reset "delete and re-add" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_del_endpoint $ns2 2 10.0.2.2 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after re-add" 2 + chk_mptcp_info subflows 1 subflows 1 + mptcp_lib_kill_wait $tests_pid + fi +} + +# [$1: error message] +usage() +{ + if [ -n "${1}" ]; then + echo "${1}" + ret=${KSFT_FAIL} + fi + + echo "mptcp_join usage:" + + local key + for key in "${!all_tests[@]}"; do + echo " -${key} ${all_tests[${key}]}" + done + + echo " -c capture pcap files" + echo " -C enable data checksum" + echo " -i use ip mptcp" + echo " -h help" + + echo "[test ids|names]" + + exit ${ret} +} + + +# Use a "simple" array to force an specific order we cannot have with an associative one +all_tests_sorted=( + f@subflows_tests + e@subflows_error_tests + s@signal_address_tests + l@link_failure_tests + t@add_addr_timeout_tests + r@remove_tests + a@add_tests + 6@ipv6_tests + 4@v4mapped_tests + M@mixed_tests + b@backup_tests + p@add_addr_ports_tests + k@syncookies_tests + S@checksum_tests + d@deny_join_id0_tests + m@fullmesh_tests + z@fastclose_tests + F@fail_tests + u@userspace_tests + I@endpoint_tests +) + +all_tests_args="" +all_tests_names=() +for subtests in "${all_tests_sorted[@]}"; do + key="${subtests%@*}" + value="${subtests#*@}" + + all_tests_args+="${key}" + all_tests_names+=("${value}") + all_tests[${key}]="${value}" +done + +tests=() +while getopts "${all_tests_args}cCih" opt; do + case $opt in + ["${all_tests_args}"]) + tests+=("${all_tests[${opt}]}") + ;; + c) + capture=true + ;; + C) + checksum=true + ;; + i) + mptcp_lib_set_ip_mptcp + ;; + h) + usage + ;; + *) + usage "Unknown option: -${opt}" + ;; + esac +done + +shift $((OPTIND - 1)) + +for arg in "${@}"; do + if [[ "${arg}" =~ ^[0-9]+$ ]]; then + only_tests_ids+=("${arg}") + else + only_tests_names+=("${arg}") + fi +done + +if [ ${#tests[@]} -eq 0 ]; then + tests=("${all_tests_names[@]}") +fi + +for subtests in "${tests[@]}"; do + "${subtests}" +done + +if [ ${ret} -ne 0 ]; then + echo + echo "${#failed_tests[@]} failure(s) has(ve) been detected:" + for i in $(get_failed_tests_ids); do + echo -e "\t- ${i}: ${failed_tests[${i}]}" + done + echo +fi + +append_prev_results +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh new file mode 100644 index 000000000000..6ffa9b7a3260 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -0,0 +1,668 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly KSFT_PASS=0 +readonly KSFT_FAIL=1 +readonly KSFT_SKIP=4 + +# shellcheck disable=SC2155 # declare and assign separately +readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" + +# These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED +declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED +declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED +declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED + +declare -rx MPTCP_LIB_AF_INET=2 +declare -rx MPTCP_LIB_AF_INET6=10 + +MPTCP_LIB_SUBTESTS=() +MPTCP_LIB_SUBTESTS_DUPLICATED=0 +MPTCP_LIB_SUBTEST_FLAKY=0 +MPTCP_LIB_TEST_COUNTER=0 +MPTCP_LIB_TEST_FORMAT="%02u %-50s" +MPTCP_LIB_IP_MPTCP=0 + +# only if supported (or forced) and not disabled, see no-color.org +if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && + [ "${NO_COLOR:-}" != "1" ]; then + readonly MPTCP_LIB_COLOR_RED="\E[1;31m" + readonly MPTCP_LIB_COLOR_GREEN="\E[1;32m" + readonly MPTCP_LIB_COLOR_YELLOW="\E[1;33m" + readonly MPTCP_LIB_COLOR_BLUE="\E[1;34m" + readonly MPTCP_LIB_COLOR_RESET="\E[0m" +else + readonly MPTCP_LIB_COLOR_RED= + readonly MPTCP_LIB_COLOR_GREEN= + readonly MPTCP_LIB_COLOR_YELLOW= + readonly MPTCP_LIB_COLOR_BLUE= + readonly MPTCP_LIB_COLOR_RESET= +fi + +# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors +# from subtests marked as flaky +mptcp_lib_override_flaky() { + [ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ] +} + +mptcp_lib_subtest_is_flaky() { + [ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky +} + +# $1: color, $2: text +mptcp_lib_print_color() { + echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}" +} + +mptcp_lib_print_ok() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_GREEN}${*}" +} + +mptcp_lib_print_warn() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_YELLOW}${*}" +} + +mptcp_lib_print_info() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_BLUE}${*}" +} + +mptcp_lib_print_err() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}" +} + +# shellcheck disable=SC2120 # parameters are optional +mptcp_lib_pr_ok() { + mptcp_lib_print_ok "[ OK ]${1:+ ${*}}" +} + +mptcp_lib_pr_skip() { + mptcp_lib_print_warn "[SKIP]${1:+ ${*}}" +} + +mptcp_lib_pr_fail() { + local title cmt + + if mptcp_lib_subtest_is_flaky; then + title="IGNO" + cmt=" (flaky)" + else + title="FAIL" + fi + + mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}" +} + +mptcp_lib_pr_info() { + mptcp_lib_print_info "INFO: ${*}" +} + +# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all +# features using the last version of the kernel and the selftests to make sure +# a test is not being skipped by mistake. +mptcp_lib_expect_all_features() { + [ "${SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES:-}" = "1" ] +} + +# $1: msg +mptcp_lib_fail_if_expected_feature() { + if mptcp_lib_expect_all_features; then + echo "ERROR: missing feature: ${*}" + exit ${KSFT_FAIL} + fi + + return 1 +} + +# $1: file +mptcp_lib_has_file() { + local f="${1}" + + if [ -f "${f}" ]; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${f} file not found" +} + +mptcp_lib_check_mptcp() { + if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then + mptcp_lib_pr_skip "MPTCP support is not available" + exit ${KSFT_SKIP} + fi +} + +mptcp_lib_check_kallsyms() { + if ! mptcp_lib_has_file "/proc/kallsyms"; then + mptcp_lib_pr_skip "CONFIG_KALLSYMS is missing" + exit ${KSFT_SKIP} + fi +} + +# Internal: use mptcp_lib_kallsyms_has() instead +__mptcp_lib_kallsyms_has() { + local sym="${1}" + + mptcp_lib_check_kallsyms + + grep -q " ${sym}" /proc/kallsyms +} + +# $1: part of a symbol to look at, add '$' at the end for full name +mptcp_lib_kallsyms_has() { + local sym="${1}" + + if __mptcp_lib_kallsyms_has "${sym}"; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${sym} symbol not found" +} + +# $1: part of a symbol to look at, add '$' at the end for full name +mptcp_lib_kallsyms_doesnt_have() { + local sym="${1}" + + if ! __mptcp_lib_kallsyms_has "${sym}"; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${sym} symbol has been found" +} + +# !!!AVOID USING THIS!!! +# Features might not land in the expected version and features can be backported +# +# $1: kernel version, e.g. 6.3 +mptcp_lib_kversion_ge() { + local exp_maj="${1%.*}" + local exp_min="${1#*.}" + local v maj min + + # If the kernel has backported features, set this env var to 1: + if [ "${SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK:-}" = "1" ]; then + return 0 + fi + + v=$(uname -r | cut -d'.' -f1,2) + maj=${v%.*} + min=${v#*.} + + if [ "${maj}" -gt "${exp_maj}" ] || + { [ "${maj}" -eq "${exp_maj}" ] && [ "${min}" -ge "${exp_min}" ]; }; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" +} + +__mptcp_lib_result_check_duplicated() { + local subtest + + for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do + if [[ "${subtest}" == *" - ${KSFT_TEST}: ${*%% #*}" ]]; then + MPTCP_LIB_SUBTESTS_DUPLICATED=1 + mptcp_lib_print_err "Duplicated entry: ${*}" + break + fi + done +} + +__mptcp_lib_result_add() { + local result="${1}" + shift + + local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) + + __mptcp_lib_result_check_duplicated "${*}" + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") +} + +# $1: test name +mptcp_lib_result_pass() { + __mptcp_lib_result_add "ok" "${1}" +} + +# $1: test name +mptcp_lib_result_fail() { + if mptcp_lib_subtest_is_flaky; then + # It might sound better to use 'not ok # TODO' or 'ok # SKIP', + # but some CIs don't understand 'TODO' and treat SKIP as errors. + __mptcp_lib_result_add "ok" "${1} # IGNORE Flaky" + else + __mptcp_lib_result_add "not ok" "${1}" + fi +} + +# $1: test name +mptcp_lib_result_skip() { + __mptcp_lib_result_add "ok" "${1} # SKIP" +} + +# $1: result code ; $2: test name +mptcp_lib_result_code() { + local ret="${1}" + local name="${2}" + + case "${ret}" in + "${KSFT_PASS}") + mptcp_lib_result_pass "${name}" + ;; + "${KSFT_FAIL}") + mptcp_lib_result_fail "${name}" + ;; + "${KSFT_SKIP}") + mptcp_lib_result_skip "${name}" + ;; + *) + echo "ERROR: wrong result code: ${ret}" + exit ${KSFT_FAIL} + ;; + esac +} + +mptcp_lib_result_print_all_tap() { + local subtest + + if [ ${#MPTCP_LIB_SUBTESTS[@]} -eq 0 ] || + [ "${SELFTESTS_MPTCP_LIB_NO_TAP:-}" = "1" ]; then + return + fi + + printf "\nTAP version 13\n" + printf "1..%d\n" "${#MPTCP_LIB_SUBTESTS[@]}" + + for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do + printf "%s\n" "${subtest}" + done + + if [ "${MPTCP_LIB_SUBTESTS_DUPLICATED}" = 1 ] && + mptcp_lib_expect_all_features; then + mptcp_lib_print_err "Duplicated test entries" + exit ${KSFT_FAIL} + fi +} + +# get the value of keyword $1 in the line marked by keyword $2 +mptcp_lib_get_info_value() { + grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' +} + +# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] +mptcp_lib_evts_get_info() { + grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," +} + +# $1: PID +mptcp_lib_kill_wait() { + [ "${1}" -eq 0 ] && return 0 + + kill -SIGUSR1 "${1}" > /dev/null 2>&1 + kill "${1}" > /dev/null 2>&1 + wait "${1}" 2>/dev/null +} + +# $1: IP address +mptcp_lib_is_v6() { + [ -z "${1##*:*}" ] +} + +# $1: ns, $2: MIB counter +mptcp_lib_get_counter() { + local ns="${1}" + local counter="${2}" + local count + + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + if [ -z "${count}" ]; then + mptcp_lib_fail_if_expected_feature "${counter} counter" + return 1 + fi + + echo "${count}" +} + +mptcp_lib_make_file() { + local name="${1}" + local bs="${2}" + local size="${3}" + + dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}" +} + +# $1: file +mptcp_lib_print_file_err() { + ls -l "${1}" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "${1}" +} + +# $1: input file ; $2: output file ; $3: what kind of file +mptcp_lib_check_transfer() { + local in="${1}" + local out="${2}" + local what="${3}" + + if ! cmp "$in" "$out" > /dev/null 2>&1; then + mptcp_lib_pr_fail "$what does not match (in, out):" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" + + return 1 + fi + + return 0 +} + +# $1: ns, $2: port +mptcp_lib_wait_local_port_listen() { + local listener_ns="${1}" + local port="${2}" + + local port_hex + port_hex="$(printf "%04X" "${port}")" + + local _ + for _ in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ + {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +mptcp_lib_check_output() { + local err="${1}" + local cmd="${2}" + local expected="${3}" + local cmd_ret=0 + local out + + if ! out=$(${cmd} 2>"${err}"); then + cmd_ret=${?} + fi + + if [ ${cmd_ret} -ne 0 ]; then + mptcp_lib_pr_fail "command execution '${cmd}' stderr" + cat "${err}" + return 2 + elif [ "${out}" = "${expected}" ]; then + return 0 + else + mptcp_lib_pr_fail "expected '${expected}' got '${out}'" + return 1 + fi +} + +mptcp_lib_check_tools() { + local tool + + for tool in "${@}"; do + case "${tool}" in + "ip") + if ! ip -Version &> /dev/null; then + mptcp_lib_pr_skip "Could not run test without ip tool" + exit ${KSFT_SKIP} + fi + ;; + "tc") + if ! tc -help &> /dev/null; then + mptcp_lib_pr_skip "Could not run test without tc tool" + exit ${KSFT_SKIP} + fi + ;; + "ss") + if ! ss -h | grep -q MPTCP; then + mptcp_lib_pr_skip "ss tool does not support MPTCP" + exit ${KSFT_SKIP} + fi + ;; + "iptables"* | "ip6tables"*) + if ! "${tool}" -V &> /dev/null; then + mptcp_lib_pr_skip "Could not run all tests without ${tool}" + exit ${KSFT_SKIP} + fi + ;; + *) + mptcp_lib_pr_fail "Internal error: unsupported tool: ${tool}" + exit ${KSFT_FAIL} + ;; + esac + done +} + +mptcp_lib_ns_init() { + local sec rndh + + sec=$(date +%s) + rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX) + + local netns + for netns in "${@}"; do + eval "${netns}=${netns}-${rndh}" + + ip netns add "${!netns}" || exit ${KSFT_SKIP} + ip -net "${!netns}" link set lo up + ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 + ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 + done +} + +mptcp_lib_ns_exit() { + local netns + for netns in "${@}"; do + ip netns del "${netns}" + rm -f /tmp/"${netns}".{nstat,out} + done +} + +mptcp_lib_events() { + local ns="${1}" + local evts="${2}" + declare -n pid="${3}" + + :>"${evts}" + + mptcp_lib_kill_wait "${pid:-0}" + ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 & + pid=$! +} + +mptcp_lib_print_title() { + : "${MPTCP_LIB_TEST_COUNTER:?}" + : "${MPTCP_LIB_TEST_FORMAT:?}" + + # shellcheck disable=SC2059 # the format is in a variable + printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}" +} + +# $1: var name ; $2: prev ret +mptcp_lib_check_expected_one() { + local var="${1}" + local exp="e_${var}" + local prev_ret="${2}" + + if [ "${!var}" = "${!exp}" ]; then + return 0 + fi + + if [ "${prev_ret}" = "0" ]; then + mptcp_lib_pr_fail + fi + + mptcp_lib_print_err "Expected value for '${var}': '${!exp}', got '${!var}'." + return 1 +} + +# $@: all var names to check +mptcp_lib_check_expected() { + local rc=0 + local var + + for var in "${@}"; do + mptcp_lib_check_expected_one "${var}" "${rc}" || rc=1 + done + + return "${rc}" +} + +# shellcheck disable=SC2034 # Some variables are used below but indirectly +mptcp_lib_verify_listener_events() { + local evt=${1} + local e_type=${2} + local e_family=${3} + local e_saddr=${4} + local e_sport=${5} + local type + local family + local saddr + local sport + local rc=0 + + type=$(mptcp_lib_evts_get_info type "${evt}" "${e_type}") + family=$(mptcp_lib_evts_get_info family "${evt}" "${e_type}") + if [ "${family}" ] && [ "${family}" = "${AF_INET6}" ]; then + saddr=$(mptcp_lib_evts_get_info saddr6 "${evt}" "${e_type}") + else + saddr=$(mptcp_lib_evts_get_info saddr4 "${evt}" "${e_type}") + fi + sport=$(mptcp_lib_evts_get_info sport "${evt}" "${e_type}") + + mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}" + return "${rc}" +} + +mptcp_lib_set_ip_mptcp() { + MPTCP_LIB_IP_MPTCP=1 +} + +mptcp_lib_is_ip_mptcp() { + [ "${MPTCP_LIB_IP_MPTCP}" = "1" ] +} + +# format: <id>,<ip>,<flags>,<dev> +mptcp_lib_pm_nl_format_endpoints() { + local entry id ip flags dev port + + for entry in "${@}"; do + IFS=, read -r id ip flags dev port <<< "${entry}" + if mptcp_lib_is_ip_mptcp; then + echo -n "${ip}" + [ -n "${port}" ] && echo -n " port ${port}" + echo -n " id ${id}" + [ -n "${flags}" ] && echo -n " ${flags}" + [ -n "${dev}" ] && echo -n " dev ${dev}" + echo " " # always a space at the end + else + echo -n "id ${id}" + echo -n " flags ${flags//" "/","}" + [ -n "${dev}" ] && echo -n " dev ${dev}" + echo -n " ${ip}" + [ -n "${port}" ] && echo -n " ${port}" + echo "" + fi + done +} + +mptcp_lib_pm_nl_get_endpoint() { + local ns=${1} + local id=${2} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint show id "${id}" + else + ip netns exec "${ns}" ./pm_nl_ctl get "${id}" + fi +} + +mptcp_lib_pm_nl_set_limits() { + local ns=${1} + local addrs=${2} + local subflows=${3} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp limits set add_addr_accepted "${addrs}" subflows "${subflows}" + else + ip netns exec "${ns}" ./pm_nl_ctl limits "${addrs}" "${subflows}" + fi +} + +mptcp_lib_pm_nl_add_endpoint() { + local ns=${1} + local addr=${2} + local flags dev id port + local nr=2 + + local p + for p in "${@}"; do + case "${p}" in + "flags" | "dev" | "id" | "port") + eval "${p}"=\$"${nr}" + ;; + esac + + nr=$((nr + 1)) + done + + if mptcp_lib_is_ip_mptcp; then + # shellcheck disable=SC2086 # blanks in flags, no double quote + ip -n "${ns}" mptcp endpoint add "${addr}" ${flags//","/" "} \ + ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"} + else + ip netns exec "${ns}" ./pm_nl_ctl add "${addr}" ${flags:+flags "${flags}"} \ + ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"} + fi +} + +mptcp_lib_pm_nl_del_endpoint() { + local ns=${1} + local id=${2} + local addr=${3} + + if mptcp_lib_is_ip_mptcp; then + [ "${id}" -ne 0 ] && addr='' + ip -n "${ns}" mptcp endpoint delete id "${id}" ${addr:+"${addr}"} + else + ip netns exec "${ns}" ./pm_nl_ctl del "${id}" "${addr}" + fi +} + +mptcp_lib_pm_nl_flush_endpoint() { + local ns=${1} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint flush + else + ip netns exec "${ns}" ./pm_nl_ctl flush + fi +} + +mptcp_lib_pm_nl_show_endpoints() { + local ns=${1} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint show + else + ip netns exec "${ns}" ./pm_nl_ctl dump + fi +} + +mptcp_lib_pm_nl_change_endpoint() { + local ns=${1} + local id=${2} + local flags=${3} + + if mptcp_lib_is_ip_mptcp; then + # shellcheck disable=SC2086 # blanks in flags, no double quote + ip -n "${ns}" mptcp endpoint change id "${id}" ${flags//","/" "} + else + ip netns exec "${ns}" ./pm_nl_ctl set id "${id}" flags "${flags}" + fi +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c new file mode 100644 index 000000000000..926b0be87c99 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -0,0 +1,866 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <string.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <time.h> +#include <unistd.h> + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include <netdb.h> +#include <netinet/in.h> + +#include <linux/tcp.h> + +static int pf = AF_INET; + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif +#ifndef SOL_MPTCP +#define SOL_MPTCP 284 +#endif + +#ifndef MPTCP_INFO +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; +}; + +struct mptcp_subflow_data { + __u32 size_subflow_data; /* size of this structure in userspace */ + __u32 num_subflows; /* must be 0, set by kernel */ + __u32 size_kernel; /* must be 0, set by kernel */ + __u32 size_user; /* size of one element in data[] */ +} __attribute__((aligned(8))); + +struct mptcp_subflow_addrs { + union { + __kernel_sa_family_t sa_family; + struct sockaddr sa_local; + struct sockaddr_in sin_local; + struct sockaddr_in6 sin6_local; + struct __kernel_sockaddr_storage ss_local; + }; + union { + struct sockaddr sa_remote; + struct sockaddr_in sin_remote; + struct sockaddr_in6 sin6_remote; + struct __kernel_sockaddr_storage ss_remote; + }; +}; + +#define MPTCP_INFO 1 +#define MPTCP_TCPINFO 2 +#define MPTCP_SUBFLOW_ADDRS 3 +#endif + +#ifndef MPTCP_FULL_INFO +struct mptcp_subflow_info { + __u32 id; + struct mptcp_subflow_addrs addrs; +}; + +struct mptcp_full_info { + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ + __u32 size_tcpinfo_user; + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ + __u32 size_sfinfo_user; + __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */ + __u32 size_arrays_user; /* max subflows that userspace is interested in; + * the buffers at subflow_info/tcp_info + * are respectively at least: + * size_arrays * size_sfinfo_user + * size_arrays * size_tcpinfo_user + * bytes wide + */ + __aligned_u64 subflow_info; + __aligned_u64 tcp_info; + struct mptcp_info mptcp_info; +}; + +#define MPTCP_FULL_INFO 4 +#endif + +struct so_state { + struct mptcp_info mi; + struct mptcp_info last_sample; + struct tcp_info tcp_info; + struct mptcp_subflow_addrs addrs; + uint64_t mptcpi_rcv_delta; + uint64_t tcpi_rcv_delta; + bool pkt_stats_avail; +}; + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage: mptcp_sockopt [-6]\n"); + exit(r); +} + +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(1); +} + +static const char *getxinfo_strerr(int err) +{ + if (err == EAI_SYSTEM) + return strerror(errno); + + return gai_strerror(err); +} + +static void xgetaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, + struct addrinfo **res) +{ + int err = getaddrinfo(node, service, hints, res); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", + node ? node : "", service ? service : "", errstr); + exit(1); + } +} + +static int sock_listen_mptcp(const char * const listenaddr, + const char * const port) +{ + int sock = -1; + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_PASSIVE | AI_NUMERICHOST + }; + + hints.ai_family = pf; + + struct addrinfo *a, *addr; + int one = 1; + + xgetaddrinfo(listenaddr, port, &hints, &addr); + hints.ai_family = pf; + + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, IPPROTO_MPTCP); + if (sock < 0) + continue; + + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one))) + perror("setsockopt"); + + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("bind"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + + if (sock < 0) + xerror("could not create listen socket"); + + if (listen(sock, 20)) + die_perror("listen"); + + return sock; +} + +static int sock_connect_mptcp(const char * const remoteaddr, + const char * const port, int proto) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1; + + hints.ai_family = pf; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto); + if (sock < 0) + continue; + + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + die_perror("connect"); + } + + if (sock < 0) + xerror("could not create connect socket"); + + freeaddrinfo(addr); + return sock; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "h6")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case '6': + pf = AF_INET6; + break; + default: + die_usage(1); + break; + } + } +} + +static void do_getsockopt_bogus_sf_data(int fd, int optname) +{ + struct mptcp_subflow_data good_data; + struct bogus_data { + struct mptcp_subflow_data d; + char buf[2]; + } bd; + socklen_t olen, _olen; + int ret; + + memset(&bd, 0, sizeof(bd)); + memset(&good_data, 0, sizeof(good_data)); + + olen = sizeof(good_data); + good_data.size_subflow_data = olen; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* 0 size_subflow_data */ + assert(olen == sizeof(good_data)); + + bd.d = good_data; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret == 0); + assert(olen == sizeof(good_data)); + assert(bd.d.num_subflows == 1); + assert(bd.d.size_kernel > 0); + assert(bd.d.size_user == 0); + + bd.d = good_data; + _olen = rand() % olen; + olen = _olen; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* bogus olen */ + assert(olen == _olen); /* must be unchanged */ + + bd.d = good_data; + olen = sizeof(good_data); + bd.d.size_kernel = 1; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* size_kernel not 0 */ + + bd.d = good_data; + olen = sizeof(good_data); + bd.d.num_subflows = 1; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* num_subflows not 0 */ + + /* forward compat check: larger struct mptcp_subflow_data on 'old' kernel */ + bd.d = good_data; + olen = sizeof(bd); + bd.d.size_subflow_data = sizeof(bd); + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret == 0); + + /* olen must be truncated to real data size filled by kernel: */ + assert(olen == sizeof(good_data)); + + assert(bd.d.size_subflow_data == sizeof(bd)); + + bd.d = good_data; + bd.d.size_subflow_data += 1; + bd.d.size_user = 1; + olen = bd.d.size_subflow_data + 1; + _olen = olen; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &_olen); + assert(ret == 0); + + /* no truncation, kernel should have filled 1 byte of optname payload in buf[1]: */ + assert(olen == _olen); + + assert(bd.d.size_subflow_data == sizeof(good_data) + 1); + assert(bd.buf[0] == 0); +} + +static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w) +{ + struct mptcp_info i; + socklen_t olen; + int ret; + + olen = sizeof(i); + ret = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &i, &olen); + + if (ret < 0) + die_perror("getsockopt MPTCP_INFO"); + + s->pkt_stats_avail = olen >= sizeof(i); + + s->last_sample = i; + if (s->mi.mptcpi_write_seq == 0) + s->mi = i; + + assert(s->mi.mptcpi_write_seq + w == i.mptcpi_write_seq); + + s->mptcpi_rcv_delta = i.mptcpi_rcv_nxt - s->mi.mptcpi_rcv_nxt; +} + +static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t w) +{ + struct my_tcp_info { + struct mptcp_subflow_data d; + struct tcp_info ti[2]; + } ti; + int ret, tries = 5; + socklen_t olen; + + do { + memset(&ti, 0, sizeof(ti)); + + ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + ti.d.size_user = sizeof(struct tcp_info); + olen = sizeof(ti); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen); + if (ret < 0) + xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)"); + + assert(olen <= sizeof(ti)); + assert(ti.d.size_kernel > 0); + assert(ti.d.size_user == + MIN(ti.d.size_kernel, sizeof(struct tcp_info))); + assert(ti.d.num_subflows == 1); + + assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); + olen -= sizeof(struct mptcp_subflow_data); + assert(olen == ti.d.size_user); + + s->tcp_info = ti.ti[0]; + + if (ti.ti[0].tcpi_bytes_sent == w && + ti.ti[0].tcpi_bytes_received == r) + goto done; + + if (r == 0 && ti.ti[0].tcpi_bytes_sent == w && + ti.ti[0].tcpi_bytes_received) { + s->tcpi_rcv_delta = ti.ti[0].tcpi_bytes_received; + goto done; + } + + /* wait and repeat, might be that tx is still ongoing */ + sleep(1); + } while (tries-- > 0); + + xerror("tcpi_bytes_sent %" PRIu64 ", want %zu. tcpi_bytes_received %" PRIu64 ", want %zu", + ti.ti[0].tcpi_bytes_sent, w, ti.ti[0].tcpi_bytes_received, r); + +done: + do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO); +} + +static void do_getsockopt_subflow_addrs(struct so_state *s, int fd) +{ + struct sockaddr_storage remote, local; + socklen_t olen, rlen, llen; + int ret; + struct my_addrs { + struct mptcp_subflow_data d; + struct mptcp_subflow_addrs addr[2]; + } addrs; + + memset(&addrs, 0, sizeof(addrs)); + memset(&local, 0, sizeof(local)); + memset(&remote, 0, sizeof(remote)); + + addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + addrs.d.size_user = sizeof(struct mptcp_subflow_addrs); + olen = sizeof(addrs); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen); + if (ret < 0) + die_perror("getsockopt MPTCP_SUBFLOW_ADDRS"); + + assert(olen <= sizeof(addrs)); + assert(addrs.d.size_kernel > 0); + assert(addrs.d.size_user == + MIN(addrs.d.size_kernel, sizeof(struct mptcp_subflow_addrs))); + assert(addrs.d.num_subflows == 1); + + assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); + olen -= sizeof(struct mptcp_subflow_data); + assert(olen == addrs.d.size_user); + + llen = sizeof(local); + ret = getsockname(fd, (struct sockaddr *)&local, &llen); + if (ret < 0) + die_perror("getsockname"); + rlen = sizeof(remote); + ret = getpeername(fd, (struct sockaddr *)&remote, &rlen); + if (ret < 0) + die_perror("getpeername"); + + assert(rlen > 0); + assert(rlen == llen); + + assert(remote.ss_family == local.ss_family); + + assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0); + assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0); + s->addrs = addrs.addr[0]; + + memset(&addrs, 0, sizeof(addrs)); + + addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + addrs.d.size_user = sizeof(sa_family_t); + olen = sizeof(addrs.d) + sizeof(sa_family_t); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen); + assert(ret == 0); + assert(olen == sizeof(addrs.d) + sizeof(sa_family_t)); + + assert(addrs.addr[0].sa_family == pf); + assert(addrs.addr[0].sa_family == local.ss_family); + + assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) != 0); + assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) != 0); + + do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS); +} + +static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd) +{ + size_t data_size = sizeof(struct mptcp_full_info); + struct mptcp_subflow_info sfinfo[2]; + struct tcp_info tcp_info[2]; + struct mptcp_full_info mfi; + socklen_t olen; + int ret; + + memset(&mfi, 0, data_size); + memset(tcp_info, 0, sizeof(tcp_info)); + memset(sfinfo, 0, sizeof(sfinfo)); + + mfi.size_tcpinfo_user = sizeof(struct tcp_info); + mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info); + mfi.size_arrays_user = 2; + mfi.subflow_info = (unsigned long)&sfinfo[0]; + mfi.tcp_info = (unsigned long)&tcp_info[0]; + olen = data_size; + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + perror("MPTCP_FULL_INFO test skipped"); + return; + } + xerror("getsockopt MPTCP_FULL_INFO"); + } + + assert(olen <= data_size); + assert(mfi.size_tcpinfo_kernel > 0); + assert(mfi.size_tcpinfo_user == + MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info))); + assert(mfi.size_sfinfo_kernel > 0); + assert(mfi.size_sfinfo_user == + MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info))); + assert(mfi.num_subflows == 1); + + /* Tolerate future extension to mptcp_info struct and running newer + * test on top of older kernel. + * Anyway any kernel supporting MPTCP_FULL_INFO must at least include + * the following in mptcp_info. + */ + assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info)); + assert(mfi.mptcp_info.mptcpi_subflows == 0); + assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent); + assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received); + + assert(sfinfo[0].id == 1); + assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent); + assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received); + assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs))); +} + +static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w) +{ + do_getsockopt_mptcp_info(s, fd, w); + + do_getsockopt_tcp_info(s, fd, r, w); + + do_getsockopt_subflow_addrs(s, fd); + + if (r) + do_getsockopt_mptcp_full_info(s, fd); +} + +static void connect_one_server(int fd, int pipefd) +{ + char buf[4096], buf2[4096]; + size_t len, i, total; + struct so_state s; + bool eof = false; + ssize_t ret; + + memset(&s, 0, sizeof(s)); + + len = rand() % (sizeof(buf) - 1); + + if (len < 128) + len = 128; + + for (i = 0; i < len ; i++) { + buf[i] = rand() % 26; + buf[i] += 'A'; + } + + buf[i] = '\n'; + + do_getsockopts(&s, fd, 0, 0); + + /* un-block server */ + ret = read(pipefd, buf2, 4); + assert(ret == 4); + close(pipefd); + + assert(strncmp(buf2, "xmit", 4) == 0); + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + + if (ret != (ssize_t)len) + xerror("short write"); + + total = 0; + do { + ret = read(fd, buf2 + total, sizeof(buf2) - total); + if (ret < 0) + die_perror("read"); + if (ret == 0) { + eof = true; + break; + } + + total += ret; + } while (total < len); + + if (total != len) + xerror("total %lu, len %lu eof %d\n", total, len, eof); + + if (memcmp(buf, buf2, len)) + xerror("data corruption"); + + if (s.tcpi_rcv_delta) + assert(s.tcpi_rcv_delta <= total); + + do_getsockopts(&s, fd, ret, ret); + + if (eof) + total += 1; /* sequence advances due to FIN */ + + assert(s.mptcpi_rcv_delta == (uint64_t)total); + close(fd); +} + +static void process_one_client(int fd, int pipefd) +{ + ssize_t ret, ret2, ret3; + struct so_state s; + char buf[4096]; + + memset(&s, 0, sizeof(s)); + do_getsockopts(&s, fd, 0, 0); + + ret = write(pipefd, "xmit", 4); + assert(ret == 4); + + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) + die_perror("read"); + + assert(s.mptcpi_rcv_delta <= (uint64_t)ret); + + if (s.tcpi_rcv_delta) + assert(s.tcpi_rcv_delta == (uint64_t)ret); + + ret2 = write(fd, buf, ret); + if (ret2 < 0) + die_perror("write"); + + /* wait for hangup */ + ret3 = read(fd, buf, 1); + if (ret3 != 0) + xerror("expected EOF, got %lu", ret3); + + do_getsockopts(&s, fd, ret, ret2); + if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) + xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); + + /* be nice when running on top of older kernel */ + if (s.pkt_stats_avail) { + if (s.last_sample.mptcpi_bytes_sent != ret2) + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_sent, ret2, + s.last_sample.mptcpi_bytes_sent - ret2); + if (s.last_sample.mptcpi_bytes_received != ret) + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_received, ret, + s.last_sample.mptcpi_bytes_received - ret); + if (s.last_sample.mptcpi_bytes_acked != ret) + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_acked, ret2, + s.last_sample.mptcpi_bytes_acked - ret2); + } + + close(fd); +} + +static int xaccept(int s) +{ + int fd = accept(s, NULL, 0); + + if (fd < 0) + die_perror("accept"); + + return fd; +} + +static int server(int pipefd) +{ + int fd = -1, r; + + switch (pf) { + case AF_INET: + fd = sock_listen_mptcp("127.0.0.1", "15432"); + break; + case AF_INET6: + fd = sock_listen_mptcp("::1", "15432"); + break; + default: + xerror("Unknown pf %d\n", pf); + break; + } + + r = write(pipefd, "conn", 4); + assert(r == 4); + + alarm(15); + r = xaccept(fd); + + process_one_client(r, pipefd); + + return 0; +} + +static void test_ip_tos_sockopt(int fd) +{ + uint8_t tos_in, tos_out; + socklen_t s; + int r; + + tos_in = rand() & 0xfc; + r = setsockopt(fd, SOL_IP, IP_TOS, &tos_in, sizeof(tos_out)); + if (r != 0) + die_perror("setsockopt IP_TOS"); + + tos_out = 0; + s = sizeof(tos_out); + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != 0) + die_perror("getsockopt IP_TOS"); + + if (tos_in != tos_out) + xerror("tos %x != %x socklen_t %d\n", tos_in, tos_out, s); + + if (s != 1) + xerror("tos should be 1 byte"); + + s = 0; + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != 0) + die_perror("getsockopt IP_TOS 0"); + if (s != 0) + xerror("expect socklen_t == 0"); + + s = -1; + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != -1 && errno != EINVAL) + die_perror("getsockopt IP_TOS did not indicate -EINVAL"); + if (s != -1) + xerror("expect socklen_t == -1"); +} + +static int client(int pipefd) +{ + int fd = -1; + + alarm(15); + + switch (pf) { + case AF_INET: + fd = sock_connect_mptcp("127.0.0.1", "15432", IPPROTO_MPTCP); + break; + case AF_INET6: + fd = sock_connect_mptcp("::1", "15432", IPPROTO_MPTCP); + break; + default: + xerror("Unknown pf %d\n", pf); + } + + test_ip_tos_sockopt(fd); + + connect_one_server(fd, pipefd); + + return 0; +} + +static pid_t xfork(void) +{ + pid_t p = fork(); + + if (p < 0) + die_perror("fork"); + + return p; +} + +static int rcheck(int wstatus, const char *what) +{ + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == 0) + return 0; + fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus)); + return WEXITSTATUS(wstatus); + } else if (WIFSIGNALED(wstatus)) { + xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus)); + } else if (WIFSTOPPED(wstatus)) { + xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus)); + } + + return 111; +} + +static void init_rng(void) +{ + int fd = open("/dev/urandom", O_RDONLY); + + if (fd >= 0) { + unsigned int foo; + ssize_t ret; + + /* can't fail */ + ret = read(fd, &foo, sizeof(foo)); + assert(ret == sizeof(foo)); + + close(fd); + srand(foo); + } else { + srand(time(NULL)); + } +} + +int main(int argc, char *argv[]) +{ + int e1, e2, wstatus; + pid_t s, c, ret; + int pipefds[2]; + + parse_opts(argc, argv); + + init_rng(); + + e1 = pipe(pipefds); + if (e1 < 0) + die_perror("pipe"); + + s = xfork(); + if (s == 0) + return server(pipefds[1]); + + close(pipefds[1]); + + /* wait until server bound a socket */ + e1 = read(pipefds[0], &e1, 4); + assert(e1 == 4); + + c = xfork(); + if (c == 0) + return client(pipefds[0]); + + close(pipefds[0]); + + ret = waitpid(s, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e1 = rcheck(wstatus, "server"); + ret = waitpid(c, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e2 = rcheck(wstatus, "client"); + + return e1 ? e1 : e2; +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh new file mode 100755 index 000000000000..68899a303a1a --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -0,0 +1,360 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + +ret=0 +sin="" +sout="" +cin="" +cout="" +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) +iptables="iptables" +ip6tables="ip6tables" + +ns1="" +ns2="" +ns_sbox="" + +usage() { + echo "Usage: $0 [ -i ] [ -h ]" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" + echo -e "\t-h: help" +} + +while getopts "hi" option;do + case "$option" in + "h") + usage "$0" + exit ${KSFT_PASS} + ;; + "i") + mptcp_lib_set_ip_mptcp + ;; + "?") + usage "$0" + exit ${KSFT_FAIL} + ;; + esac +done + +add_mark_rules() +{ + local ns=$1 + local m=$2 + + local t + for t in ${iptables} ${ip6tables}; do + # just to debug: check we have multiple subflows connection requests + ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT + + # RST packets might be handled by a internal dummy socket + ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT + + ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT + ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP + done +} + +init() +{ + mptcp_lib_ns_init ns1 ns2 ns_sbox + + local i + for i in $(seq 1 4); do + ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" + ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i + ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad + ip -net "$ns1" link set ns1eth$i up + + ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i + ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad + ip -net "$ns2" link set ns2eth$i up + + # let $ns2 reach any $ns1 address from any interface + ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + + mptcp_lib_pm_nl_add_endpoint "${ns1}" "10.0.${i}.1" flags signal + mptcp_lib_pm_nl_add_endpoint "${ns1}" "dead:beef:${i}::1" flags signal + + mptcp_lib_pm_nl_add_endpoint "${ns2}" "10.0.${i}.2" flags signal + mptcp_lib_pm_nl_add_endpoint "${ns2}" "dead:beef:${i}::2" flags signal + done + + mptcp_lib_pm_nl_set_limits "${ns1}" 8 8 + mptcp_lib_pm_nl_set_limits "${ns2}" 8 8 + + add_mark_rules $ns1 1 + add_mark_rules $ns2 2 +} + +# This function is used in the cleanup trap +#shellcheck disable=SC2317 +cleanup() +{ + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" +} + +mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms +mptcp_lib_check_tools ip "${iptables}" "${ip6tables}" + +check_mark() +{ + local ns=$1 + local af=$2 + + local tables=${iptables} + + if [ $af -eq 6 ];then + tables=${ip6tables} + fi + + local counters values + counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP) + values=${counters%DROP*} + + local v + for v in $values; do + if [ $v -ne 0 ]; then + mptcp_lib_pr_fail "got $tables $values in ns $ns," \ + "not 0 - not all expected packets marked" + ret=${KSFT_FAIL} + return 1 + fi + done + + return 0 +} + +print_title() +{ + mptcp_lib_print_title "${@}" +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + + local port=12001 + + :> "$cout" + :> "$sout" + + local mptcp_connect="./mptcp_connect -r 20" + + local local_addr ip + if mptcp_lib_is_v6 "${connect_addr}"; then + local_addr="::" + ip=ipv6 + else + local_addr="0.0.0.0" + ip=ipv4 + fi + + cmsg="TIMESTAMPNS" + if mptcp_lib_kallsyms_has "mptcp_ioctl$"; then + cmsg+=",TCPINQ" + fi + + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ + ${local_addr} < "$sin" > "$sout" & + local spid=$! + + sleep 1 + + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ + $connect_addr < "$cin" > "$cout" & + + local cpid=$! + + wait $cpid + local retc=$? + wait $spid + local rets=$? + + print_title "Transfer ${ip:2}" + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + mptcp_lib_pr_fail "client exit code $retc, server $rets" + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + + mptcp_lib_result_fail "transfer ${ip}" + + ret=${KSFT_FAIL} + return 1 + fi + if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then + rets=1 + else + mptcp_lib_pr_ok + fi + mptcp_lib_result_code "${rets}" "transfer ${ip}" + + print_title "Mark ${ip:2}" + if [ $local_addr = "::" ];then + check_mark $listener_ns 6 || retc=1 + check_mark $connector_ns 6 || retc=1 + else + check_mark $listener_ns 4 || retc=1 + check_mark $connector_ns 4 || retc=1 + fi + + mptcp_lib_result_code "${retc}" "mark ${ip}" + + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + mptcp_lib_pr_ok + return 0 + fi + mptcp_lib_pr_fail + + return 1 +} + +make_file() +{ + local name=$1 + local who=$2 + local size=$3 + + mptcp_lib_make_file $name 1024 $size + + echo "Created $name (size $size KB) containing data sent by $who" +} + +do_mptcp_sockopt_tests() +{ + local lret=0 + + if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then + mptcp_lib_pr_skip "MPTCP sockopt not supported" + mptcp_lib_result_skip "sockopt" + return + fi + + ip netns exec "$ns_sbox" ./mptcp_sockopt + lret=$? + + print_title "SOL_MPTCP sockopt v4" + if [ $lret -ne 0 ]; then + mptcp_lib_pr_fail + mptcp_lib_result_fail "sockopt v4" + ret=$lret + return + fi + mptcp_lib_pr_ok + mptcp_lib_result_pass "sockopt v4" + + ip netns exec "$ns_sbox" ./mptcp_sockopt -6 + lret=$? + + print_title "SOL_MPTCP sockopt v6" + if [ $lret -ne 0 ]; then + mptcp_lib_pr_fail + mptcp_lib_result_fail "sockopt v6" + ret=$lret + return + fi + mptcp_lib_pr_ok + mptcp_lib_result_pass "sockopt v6" +} + +run_tests() +{ + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + local lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + + lret=$? + + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +do_tcpinq_test() +{ + print_title "TCP_INQ cmsg/ioctl $*" + ip netns exec "$ns_sbox" ./mptcp_inq "$@" + local lret=$? + if [ $lret -ne 0 ];then + ret=$lret + mptcp_lib_pr_fail + mptcp_lib_result_fail "TCP_INQ: $*" + return $lret + fi + + mptcp_lib_pr_ok + mptcp_lib_result_pass "TCP_INQ: $*" + return $lret +} + +do_tcpinq_tests() +{ + local lret=0 + + if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then + mptcp_lib_pr_skip "TCP_INQ not supported" + mptcp_lib_result_skip "TCP_INQ" + return + fi + + local args + for args in "-t tcp" "-r tcp"; do + do_tcpinq_test $args + lret=$? + if [ $lret -ne 0 ] ; then + return $lret + fi + do_tcpinq_test -6 $args + lret=$? + if [ $lret -ne 0 ] ; then + return $lret + fi + done + + do_tcpinq_test -r tcp -t tcp + + return $? +} + +sin=$(mktemp) +sout=$(mktemp) +cin=$(mktemp) +cout=$(mktemp) +init +make_file "$cin" "client" 1 +make_file "$sin" "server" 1 +trap cleanup EXIT + +run_tests $ns1 $ns2 10.0.1.1 +run_tests $ns1 $ns2 dead:beef:1::1 + +do_mptcp_sockopt_tests +do_tcpinq_tests + +mptcp_lib_result_print_all_tap +exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 15f4f46ca3a9..2757378b1b13 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -1,130 +1,270 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ksft_skip=4 +. "$(dirname "${0}")/mptcp_lib.sh" + ret=0 usage() { - echo "Usage: $0 [ -h ]" + echo "Usage: $0 [ -i ] [ -h ]" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" + echo -e "\t-h: help" } - +optstring=hi while getopts "$optstring" option;do case "$option" in "h") - usage $0 - exit 0 + usage "$0" + exit ${KSFT_PASS} + ;; + "i") + mptcp_lib_set_ip_mptcp ;; "?") - usage $0 - exit 1 + usage "$0" + exit ${KSFT_FAIL} ;; esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" +ns1="" err=$(mktemp) -ret=0 +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { - rm -f $err - ip netns del $ns1 + rm -f "${err}" + mptcp_lib_ns_exit "${ns1}" } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_mptcp +mptcp_lib_check_tools ip trap cleanup EXIT -ip netns add $ns1 || exit $ksft_skip -ip -net $ns1 link set lo up -ip netns exec $ns1 sysctl -q net.mptcp.enabled=1 +mptcp_lib_ns_init ns1 + +format_limits() { + local accept="${1}" + local subflows="${2}" + + if mptcp_lib_is_ip_mptcp; then + # with a space at the end + printf "add_addr_accepted %d subflows %d \n" "${accept}" "${subflows}" + else + printf "accept %d\nsubflows %d\n" "${accept}" "${subflows}" + fi +} + +get_limits() { + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp limits + else + ip netns exec "${ns1}" ./pm_nl_ctl limits + fi +} + +format_endpoints() { + mptcp_lib_pm_nl_format_endpoints "${@}" +} + +get_endpoint() { + # shellcheck disable=SC2317 # invoked indirectly + mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" +} + +change_address() { + local addr=${1} + local flags=${2} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp endpoint change "${addr}" "${flags}" + else + ip netns exec "${ns1}" ./pm_nl_ctl set "${addr}" flags "${flags}" + fi +} + +set_limits() +{ + mptcp_lib_pm_nl_set_limits "${ns1}" "${@}" +} + +add_endpoint() +{ + mptcp_lib_pm_nl_add_endpoint "${ns1}" "${@}" +} + +del_endpoint() +{ + mptcp_lib_pm_nl_del_endpoint "${ns1}" "${@}" +} + +flush_endpoint() +{ + mptcp_lib_pm_nl_flush_endpoint "${ns1}" +} + +show_endpoints() +{ + mptcp_lib_pm_nl_show_endpoints "${ns1}" +} + +change_endpoint() +{ + mptcp_lib_pm_nl_change_endpoint "${ns1}" "${@}" +} check() { local cmd="$1" local expected="$2" local msg="$3" - local out=`$cmd 2>$err` - local cmd_ret=$? - - printf "%-50s %s" "$msg" - if [ $cmd_ret -ne 0 ]; then - echo "[FAIL] command execution '$cmd' stderr " - cat $err - ret=1 - elif [ "$out" = "$expected" ]; then - echo "[ OK ]" - else - echo -n "[FAIL] " - echo "expected '$expected' got '$out'" - ret=1 + local rc=0 + + mptcp_lib_print_title "$msg" + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + mptcp_lib_result_fail "${msg} # error ${rc}" + ret=${KSFT_FAIL} + elif [ ${rc} -eq 0 ]; then + mptcp_lib_print_ok "[ OK ]" + mptcp_lib_result_pass "${msg}" + elif [ ${rc} -eq 1 ]; then + mptcp_lib_result_fail "${msg} # different output" + ret=${KSFT_FAIL} fi } -check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "defaults limits" +check "show_endpoints" "" "defaults addr list" + +default_limits="$(get_limits)" +if mptcp_lib_expect_all_features; then + check "get_limits" "$(format_limits 0 2)" "defaults limits" +fi -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup -check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr" +add_endpoint 10.0.1.1 +add_endpoint 10.0.1.2 flags subflow dev lo +add_endpoint 10.0.1.3 flags signal,backup +check "get_endpoint 1" "$(format_endpoints "1,10.0.1.1")" "simple add/get addr" -check "ip netns exec $ns1 ./pm_nl_ctl dump" \ -"id 1 flags 10.0.1.1 -id 2 flags subflow dev lo 10.0.1.2 -id 3 flags signal,backup 10.0.1.3" "dump addrs" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "2,10.0.1.2,subflow,lo" \ + "3,10.0.1.3,signal backup")" "dump addrs" -ip netns exec $ns1 ./pm_nl_ctl del 2 -check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr" -check "ip netns exec $ns1 ./pm_nl_ctl dump" \ -"id 1 flags 10.0.1.1 -id 3 flags signal,backup 10.0.1.3" "dump addrs after del" +del_endpoint 2 +check "get_endpoint 2" "" "simple del addr" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup")" "dump addrs after del" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 -check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" +add_endpoint 10.0.1.3 2>/dev/null +check "get_endpoint 4" "" "duplicate addr" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal -check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" +add_endpoint 10.0.1.4 flags signal +check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" -for i in `seq 5 9`; do - ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1 +for i in $(seq 5 9); do + add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 done -check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" -check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" +check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" +check "get_endpoint 10" "" "above hard addr limit" -for i in `seq 9 256`; do - ip netns exec $ns1 ./pm_nl_ctl del $i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 +del_endpoint 9 +for i in $(seq 10 255); do + add_endpoint 10.0.0.9 id "${i}" + del_endpoint "${i}" done -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 3 flags signal,backup 10.0.1.3 -id 4 flags signal 10.0.1.4 -id 5 flags signal 10.0.1.5 -id 6 flags signal 10.0.1.6 -id 7 flags signal 10.0.1.7 -id 8 flags signal 10.0.1.8" "id limit" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup" \ + "4,10.0.1.4,signal" \ + "5,10.0.1.5,signal" \ + "6,10.0.1.6,signal" \ + "7,10.0.1.7,signal" \ + "8,10.0.1.8,signal")" "id limit" + +flush_endpoint +check "show_endpoints" "" "flush addrs" + +set_limits 9 1 2>/dev/null +check "get_limits" "${default_limits}" "rcv addrs above hard limit" -ip netns exec $ns1 ./pm_nl_ctl flush -check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" +set_limits 1 9 2>/dev/null +check "get_limits" "${default_limits}" "subflows above hard limit" -ip netns exec $ns1 ./pm_nl_ctl limits 9 1 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "rcv addrs above hard limit" +set_limits 8 8 +check "get_limits" "$(format_limits 8 8)" "set limits" -ip netns exec $ns1 ./pm_nl_ctl limits 1 9 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "subflows above hard limit" +flush_endpoint +add_endpoint 10.0.1.1 +add_endpoint 10.0.1.2 +add_endpoint 10.0.1.3 id 100 +add_endpoint 10.0.1.4 +add_endpoint 10.0.1.5 id 254 +add_endpoint 10.0.1.6 +add_endpoint 10.0.1.7 +add_endpoint 10.0.1.8 +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "2,10.0.1.2" \ + "3,10.0.1.7" \ + "4,10.0.1.8" \ + "100,10.0.1.3" \ + "101,10.0.1.4" \ + "254,10.0.1.5" \ + "255,10.0.1.6")" "set ids" -ip netns exec $ns1 ./pm_nl_ctl limits 8 8 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 -subflows 8" "set limits" +flush_endpoint +add_endpoint 10.0.0.1 +add_endpoint 10.0.0.2 id 254 +add_endpoint 10.0.0.3 +add_endpoint 10.0.0.4 +add_endpoint 10.0.0.5 id 253 +add_endpoint 10.0.0.6 +add_endpoint 10.0.0.7 +add_endpoint 10.0.0.8 +check "show_endpoints" \ + "$(format_endpoints "1,10.0.0.1" \ + "2,10.0.0.4" \ + "3,10.0.0.6" \ + "4,10.0.0.7" \ + "5,10.0.0.8" \ + "253,10.0.0.5" \ + "254,10.0.0.2" \ + "255,10.0.0.3")" "wrap-around ids" + +flush_endpoint +add_endpoint 10.0.1.1 flags subflow +change_address 10.0.1.1 backup +check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup")" \ + "set flags (backup)" +change_address 10.0.1.1 nobackup +check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \ + " (nobackup)" + +# fullmesh support has been added later +change_endpoint 1 fullmesh 2>/dev/null +if show_endpoints | grep -q "fullmesh" || + mptcp_lib_expect_all_features; then + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow fullmesh")" \ + " (fullmesh)" + change_endpoint 1 nofullmesh + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \ + " (nofullmesh)" + change_endpoint 1 backup,fullmesh + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup fullmesh")" \ + " (backup,fullmesh)" +else + for st in fullmesh nofullmesh backup,fullmesh; do + st=" (${st})" + mptcp_lib_print_title "${st}" + mptcp_lib_pr_skip + mptcp_lib_result_skip "${st}" + done +fi +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index b24a2f17d415..7ad5a59adff2 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -6,6 +6,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <limits.h> #include <sys/socket.h> #include <sys/types.h> @@ -21,16 +22,29 @@ #ifndef MPTCP_PM_NAME #define MPTCP_PM_NAME "mptcp_pm" #endif +#ifndef MPTCP_PM_EVENTS +#define MPTCP_PM_EVENTS "mptcp_pm_events" +#endif +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif static void syntax(char *argv[]) { - fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]); - fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); - fprintf(stderr, "\tdel <id>\n"); + fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]); + fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); + fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n"); + fprintf(stderr, "\trem id <local-id> token <token>\n"); + fprintf(stderr, "\tcsf lip <local-ip> lid <local-id> rip <remote-ip> rport <remote-port> token <token>\n"); + fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n"); + fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); + fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); + fprintf(stderr, "\tevents\n"); + fprintf(stderr, "\tlisten <local-ip> <local-port>\n"); exit(0); } @@ -52,20 +66,25 @@ static int init_genl_req(char *data, int family, int cmd, int version) return off; } -static void nl_error(struct nlmsghdr *nh) +static int nl_error(struct nlmsghdr *nh) { struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh); int len = nh->nlmsg_len - sizeof(*nh); uint32_t off; - if (len < sizeof(struct nlmsgerr)) + if (len < sizeof(struct nlmsgerr)) { error(1, 0, "netlink error message truncated %d min %ld", len, sizeof(struct nlmsgerr)); + return -1; + } - if (!err->error) { + if (err->error) { /* check messages from kernel */ struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh); + fprintf(stderr, "netlink error %d (%s)\n", + err->error, strerror(-err->error)); + while (RTA_OK(attrs, len)) { if (attrs->rta_type == NLMSGERR_ATTR_MSG) fprintf(stderr, "netlink ext ack msg: %s\n", @@ -77,12 +96,115 @@ static void nl_error(struct nlmsghdr *nh) } attrs = RTA_NEXT(attrs, len); } - } else { - fprintf(stderr, "netlink error %d", err->error); + return -1; } + + return 0; } -/* do a netlink command and, if max > 0, fetch the reply */ +static int capture_events(int fd, int event_group) +{ + u_int8_t buffer[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024]; + struct genlmsghdr *ghdr; + struct rtattr *attrs; + struct nlmsghdr *nh; + int ret = 0; + int res_len; + int msg_len; + fd_set rfds; + + if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &event_group, sizeof(event_group)) < 0) + error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group"); + + do { + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024; + + ret = select(FD_SETSIZE, &rfds, NULL, NULL, NULL); + + if (ret < 0) + error(1, ret, "error in select() on NL socket"); + + res_len = recv(fd, buffer, res_len, 0); + if (res_len < 0) + error(1, res_len, "error on recv() from NL socket"); + + nh = (struct nlmsghdr *)buffer; + + for (; NLMSG_OK(nh, res_len); nh = NLMSG_NEXT(nh, res_len)) { + if (nh->nlmsg_type == NLMSG_ERROR) + error(1, NLMSG_ERROR, "received invalid NL message"); + + ghdr = (struct genlmsghdr *)NLMSG_DATA(nh); + + if (ghdr->cmd == 0) + continue; + + fprintf(stderr, "type:%d", ghdr->cmd); + + msg_len = nh->nlmsg_len - NLMSG_LENGTH(GENL_HDRLEN); + + attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); + while (RTA_OK(attrs, msg_len)) { + if (attrs->rta_type == MPTCP_ATTR_TOKEN) + fprintf(stderr, ",token:%u", *(__u32 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_FAMILY) + fprintf(stderr, ",family:%u", *(__u16 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_LOC_ID) + fprintf(stderr, ",loc_id:%u", *(__u8 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_REM_ID) + fprintf(stderr, ",rem_id:%u", *(__u8 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_SADDR4) { + u_int32_t saddr4 = ntohl(*(__u32 *)RTA_DATA(attrs)); + + fprintf(stderr, ",saddr4:%u.%u.%u.%u", saddr4 >> 24, + (saddr4 >> 16) & 0xFF, (saddr4 >> 8) & 0xFF, + (saddr4 & 0xFF)); + } else if (attrs->rta_type == MPTCP_ATTR_SADDR6) { + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf, + sizeof(buf)) != NULL) + fprintf(stderr, ",saddr6:%s", buf); + } else if (attrs->rta_type == MPTCP_ATTR_DADDR4) { + u_int32_t daddr4 = ntohl(*(__u32 *)RTA_DATA(attrs)); + + fprintf(stderr, ",daddr4:%u.%u.%u.%u", daddr4 >> 24, + (daddr4 >> 16) & 0xFF, (daddr4 >> 8) & 0xFF, + (daddr4 & 0xFF)); + } else if (attrs->rta_type == MPTCP_ATTR_DADDR6) { + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf, + sizeof(buf)) != NULL) + fprintf(stderr, ",daddr6:%s", buf); + } else if (attrs->rta_type == MPTCP_ATTR_SPORT) + fprintf(stderr, ",sport:%u", + ntohs(*(__u16 *)RTA_DATA(attrs))); + else if (attrs->rta_type == MPTCP_ATTR_DPORT) + fprintf(stderr, ",dport:%u", + ntohs(*(__u16 *)RTA_DATA(attrs))); + else if (attrs->rta_type == MPTCP_ATTR_BACKUP) + fprintf(stderr, ",backup:%u", *(__u8 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_ERROR) + fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE) + fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs)); + + attrs = RTA_NEXT(attrs, msg_len); + } + } + fprintf(stderr, "\n"); + } while (1); + + return 0; +} + +/* do a netlink command and, if max > 0, fetch the reply ; nh's size >1024B */ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) { struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; @@ -91,12 +213,16 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) int rem, ret; int err = 0; + /* If no expected answer, ask for an ACK to look for errors if any */ + if (max == 0) { + nh->nlmsg_flags |= NLM_F_ACK; + max = 1024; + } + nh->nlmsg_len = len; ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr)); if (ret != len) error(1, errno, "send netlink: %uB != %uB\n", ret, len); - if (max == 0) - return 0; addr_len = sizeof(nladdr); rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len); @@ -105,21 +231,29 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) /* Beware: the NLMSG_NEXT macro updates the 'rem' argument */ for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) { - if (nh->nlmsg_type == NLMSG_ERROR) { - nl_error(nh); + if (nh->nlmsg_type == NLMSG_DONE) + break; + + if (nh->nlmsg_type == NLMSG_ERROR && nl_error(nh)) err = 1; - } } if (err) error(1, 0, "bailing out due to netlink error[s]"); return ret; } -static int genl_parse_getfamily(struct nlmsghdr *nlh) +static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family, + int *events_mcast_grp) { struct genlmsghdr *ghdr = NLMSG_DATA(nlh); int len = nlh->nlmsg_len; struct rtattr *attrs; + struct rtattr *grps; + struct rtattr *grp; + int got_events_grp; + int got_family; + int grps_len; + int grp_len; if (nlh->nlmsg_type != GENL_ID_CTRL) error(1, errno, "Not a controller message, len=%d type=0x%x\n", @@ -134,9 +268,42 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh) error(1, errno, "Unknown controller command %d\n", ghdr->cmd); attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); + got_family = 0; + got_events_grp = 0; + while (RTA_OK(attrs, len)) { - if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) - return *(__u16 *)RTA_DATA(attrs); + if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) { + *pm_family = *(__u16 *)RTA_DATA(attrs); + got_family = 1; + } else if (attrs->rta_type == CTRL_ATTR_MCAST_GROUPS) { + grps = RTA_DATA(attrs); + grps_len = RTA_PAYLOAD(attrs); + + while (RTA_OK(grps, grps_len)) { + grp = RTA_DATA(grps); + grp_len = RTA_PAYLOAD(grps); + got_events_grp = 0; + + while (RTA_OK(grp, grp_len)) { + if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID) + *events_mcast_grp = *(__u32 *)RTA_DATA(grp); + else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME && + !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS)) + got_events_grp = 1; + + grp = RTA_NEXT(grp, grp_len); + } + + if (got_events_grp) + break; + + grps = RTA_NEXT(grps, grps_len); + } + } + + if (got_family && got_events_grp) + return 0; + attrs = RTA_NEXT(attrs, len); } @@ -144,7 +311,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh) return -1; } -static int resolve_mptcp_pm_netlink(int fd) +static int resolve_mptcp_pm_netlink(int fd, int *pm_family, int *events_mcast_grp) { char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + @@ -166,7 +333,429 @@ static int resolve_mptcp_pm_netlink(int fd) off += NLMSG_ALIGN(rta->rta_len); do_nl_req(fd, nh, off, sizeof(data)); - return genl_parse_getfamily((void *)data); + return genl_parse_getfamily((void *)data, pm_family, events_mcast_grp); +} + +int dsf(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *addr; + u_int16_t family, port; + struct nlmsghdr *nh; + u_int32_t token; + int addr_start; + int off = 0; + int arg; + + const char *params[5]; + + memset(params, 0, 5 * sizeof(const char *)); + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_DESTROY, + MPTCP_PM_VER); + + if (argc < 12) + syntax(argv); + + /* Params recorded in this order: + * <local-ip>, <local-port>, <remote-ip>, <remote-port>, <token> + */ + for (arg = 2; arg < argc; arg++) { + if (!strcmp(argv[arg], "lip")) { + if (++arg >= argc) + error(1, 0, " missing local IP"); + + params[0] = argv[arg]; + } else if (!strcmp(argv[arg], "lport")) { + if (++arg >= argc) + error(1, 0, " missing local port"); + + params[1] = argv[arg]; + } else if (!strcmp(argv[arg], "rip")) { + if (++arg >= argc) + error(1, 0, " missing remote IP"); + + params[2] = argv[arg]; + } else if (!strcmp(argv[arg], "rport")) { + if (++arg >= argc) + error(1, 0, " missing remote port"); + + params[3] = argv[arg]; + } else if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token"); + + params[4] = argv[arg]; + } else + error(1, 0, "unknown keyword %s", argv[arg]); + } + + for (arg = 0; arg < 4; arg = arg + 2) { + /* addr header */ + addr_start = off; + addr = (void *)(data + off); + addr->rta_type = NLA_F_NESTED | + ((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE); + addr->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(addr->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else + error(1, errno, "can't parse ip %s", params[arg]); + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + /* port */ + port = atoi(params[arg + 1]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); + + addr->rta_len = off - addr_start; + } + + /* token */ + token = strtoul(params[4], NULL, 10); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + + do_nl_req(fd, nh, off, 0); + + return 0; +} + +int csf(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW; + const char *params[5]; + struct nlmsghdr *nh; + struct rtattr *addr; + struct rtattr *rta; + u_int16_t family; + u_int32_t token; + u_int16_t port; + int addr_start; + u_int8_t id; + int off = 0; + int arg; + + memset(params, 0, 5 * sizeof(const char *)); + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_CREATE, + MPTCP_PM_VER); + + if (argc < 12) + syntax(argv); + + /* Params recorded in this order: + * <local-ip>, <local-id>, <remote-ip>, <remote-port>, <token> + */ + for (arg = 2; arg < argc; arg++) { + if (!strcmp(argv[arg], "lip")) { + if (++arg >= argc) + error(1, 0, " missing local IP"); + + params[0] = argv[arg]; + } else if (!strcmp(argv[arg], "lid")) { + if (++arg >= argc) + error(1, 0, " missing local id"); + + params[1] = argv[arg]; + } else if (!strcmp(argv[arg], "rip")) { + if (++arg >= argc) + error(1, 0, " missing remote ip"); + + params[2] = argv[arg]; + } else if (!strcmp(argv[arg], "rport")) { + if (++arg >= argc) + error(1, 0, " missing remote port"); + + params[3] = argv[arg]; + } else if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token"); + + params[4] = argv[arg]; + } else + error(1, 0, "unknown param %s", argv[arg]); + } + + for (arg = 0; arg < 4; arg = arg + 2) { + /* addr header */ + addr_start = off; + addr = (void *)(data + off); + addr->rta_type = NLA_F_NESTED | + ((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE); + addr->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(addr->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else + error(1, errno, "can't parse ip %s", params[arg]); + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + if (arg == 2) { + /* port */ + port = atoi(params[arg + 1]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); + } + + if (arg == 0) { + /* id */ + id = atoi(params[arg + 1]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } + + /* addr flags */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + + addr->rta_len = off - addr_start; + } + + /* token */ + token = strtoul(params[4], NULL, 10); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + + do_nl_req(fd, nh, off, 0); + + return 0; +} + +int remove_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct nlmsghdr *nh; + struct rtattr *rta; + u_int32_t token; + u_int8_t id; + int off = 0; + int arg; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_REMOVE, + MPTCP_PM_VER); + + if (argc < 6) + syntax(argv); + + for (arg = 2; arg < argc; arg++) { + if (!strcmp(argv[arg], "id")) { + if (++arg >= argc) + error(1, 0, " missing id value"); + + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_LOC_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token value"); + + token = strtoul(argv[arg], NULL, 10); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else + error(1, 0, "unknown keyword %s", argv[arg]); + } + + do_nl_req(fd, nh, off, 0); + return 0; +} + +int announce_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + u_int32_t flags = MPTCP_PM_ADDR_FLAG_SIGNAL; + u_int32_t token = UINT_MAX; + struct rtattr *rta, *addr; + u_int32_t id = UINT_MAX; + struct nlmsghdr *nh; + u_int16_t family; + int addr_start; + int off = 0; + int arg; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ANNOUNCE, + MPTCP_PM_VER); + + if (argc < 7) + syntax(argv); + + /* local-ip header */ + addr_start = off; + addr = (void *)(data + off); + addr->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + addr->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(addr->rta_len); + + /* local-ip data */ + /* record addr type */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else + error(1, errno, "can't parse ip %s", argv[2]); + off += NLMSG_ALIGN(rta->rta_len); + + /* addr family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + for (arg = 3; arg < argc; arg++) { + if (!strcmp(argv[arg], "id")) { + /* local-id */ + if (++arg >= argc) + error(1, 0, " missing id value"); + + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "dev")) { + /* for the if_index */ + int32_t ifindex; + + if (++arg >= argc) + error(1, 0, " missing dev name"); + + ifindex = if_nametoindex(argv[arg]); + if (!ifindex) + error(1, errno, "unknown device %s", argv[arg]); + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &ifindex, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + /* local-port (optional) */ + u_int16_t port; + + if (++arg >= argc) + error(1, 0, " missing port value"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "token")) { + /* MPTCP connection token */ + if (++arg >= argc) + error(1, 0, " missing token value"); + + token = strtoul(argv[arg], NULL, 10); + } else + error(1, 0, "unknown keyword %s", argv[arg]); + } + + /* addr flags */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + + addr->rta_len = off - addr_start; + + if (id == UINT_MAX || token == UINT_MAX) + error(1, 0, " missing mandatory inputs"); + + /* token */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + + do_nl_req(fd, nh, off, 0); + + return 0; } int add_addr(int fd, int pm_family, int argc, char *argv[]) @@ -176,8 +765,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t flags = 0; u_int16_t family; - u_int32_t flags; int nest_start; u_int8_t id; int off = 0; @@ -223,7 +812,6 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) char *tok, *str; /* flags */ - flags = 0; if (++arg >= argc) error(1, 0, " missing flags value"); @@ -236,11 +824,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; else error(1, errno, "unknown flag %s", argv[arg]); } + if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL && + flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + error(1, errno, "error flag fullmesh"); + } + rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; rta->rta_len = RTA_LENGTH(4); @@ -271,6 +866,20 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(4); memcpy(RTA_DATA(rta), &ifindex, 4); off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + u_int16_t port; + + if (++arg >= argc) + error(1, 0, " missing port value"); + if (!(flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) + error(1, 0, " flags must be signal when using port"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); } else error(1, 0, "unknown keyword %s", argv[arg]); } @@ -287,6 +896,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int16_t family; int nest_start; u_int8_t id; int off = 0; @@ -296,11 +906,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR, MPTCP_PM_VER); - /* the only argument is the address id */ - if (argc != 3) + /* the only argument is the address id (nonzero) */ + if (argc != 3 && argc != 4) syntax(argv); id = atoi(argv[2]); + /* zero id with the IP address */ + if (!id && argc != 4) + syntax(argv); nest_start = off; nest = (void *)(data + off); @@ -314,6 +927,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(1); memcpy(RTA_DATA(rta), &id, 1); off += NLMSG_ALIGN(rta->rta_len); + + if (!id) { + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[3]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + } nest->rta_len = off - nest_start; do_nl_req(fd, nh, off, 0); @@ -323,6 +960,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) static void print_addr(struct rtattr *attrs, int len) { uint16_t family = 0; + uint16_t port = 0; char str[1024]; uint32_t flags; uint8_t id; @@ -330,12 +968,16 @@ static void print_addr(struct rtattr *attrs, int len) while (RTA_OK(attrs, len)) { if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY) memcpy(&family, RTA_DATA(attrs), 2); + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_PORT) + memcpy(&port, RTA_DATA(attrs), 2); if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) { if (family != AF_INET) error(1, errno, "wrong IP (v4) for family %d", family); inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str)); printf("%s", str); + if (port) + printf(" %d", port); } if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) { if (family != AF_INET6) @@ -343,6 +985,8 @@ static void print_addr(struct rtattr *attrs, int len) family); inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str)); printf("%s", str); + if (port) + printf(" %d", port); } if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) { memcpy(&id, RTA_DATA(attrs), 1); @@ -373,6 +1017,20 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + printf("fullmesh"); + flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH; + if (flags) + printf(","); + } + + if (flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { + printf("implicit"); + flags &= ~MPTCP_PM_ADDR_FLAG_IMPLICIT; + if (flags) + printf(","); + } + /* bump unknown flags, if any */ if (flags) printf("0x%x", flags); @@ -429,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t token = 0; int nest_start; u_int8_t id; int off = 0; @@ -439,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) MPTCP_PM_VER); /* the only argument is the address id */ - if (argc != 3) + if (argc != 3 && argc != 5) syntax(argv); id = atoi(argv[2]); + if (argc == 5 && !strcmp(argv[3], "token")) + token = strtoul(argv[4], NULL, 10); nest_start = off; nest = (void *)(data + off); @@ -458,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); nest->rta_len = off - nest_start; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } @@ -469,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) 1024]; pid_t pid = getpid(); struct nlmsghdr *nh; + u_int32_t token = 0; + struct rtattr *rta; int off = 0; + if (argc != 2 && argc != 4) + syntax(argv); + + if (argc == 4 && !strcmp(argv[2], "token")) + token = strtoul(argv[3], NULL, 10); + memset(data, 0, sizeof(data)); nh = (void *)data; off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, @@ -480,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) nh->nlmsg_pid = pid; nh->nlmsg_len = off; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } @@ -584,9 +1271,245 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[]) return 0; } +int add_listener(int argc, char *argv[]) +{ + struct sockaddr_storage addr; + struct sockaddr_in6 *a6; + struct sockaddr_in *a4; + u_int16_t family = AF_UNSPEC; + int enable = 1; + int sock; + int err; + + if (argc < 4) + syntax(argv); + + memset(&addr, 0, sizeof(struct sockaddr_storage)); + a4 = (struct sockaddr_in *)&addr; + a6 = (struct sockaddr_in6 *)&addr; + + if (inet_pton(AF_INET, argv[2], &a4->sin_addr)) { + family = AF_INET; + a4->sin_family = family; + a4->sin_port = htons(atoi(argv[3])); + } else if (inet_pton(AF_INET6, argv[2], &a6->sin6_addr)) { + family = AF_INET6; + a6->sin6_family = family; + a6->sin6_port = htons(atoi(argv[3])); + } else + error(1, errno, "can't parse ip %s", argv[2]); + + sock = socket(family, SOCK_STREAM, IPPROTO_MPTCP); + if (sock < 0) + error(1, errno, "can't create listener sock\n"); + + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))) { + close(sock); + error(1, errno, "can't set SO_REUSEADDR on listener sock\n"); + } + + err = bind(sock, (struct sockaddr *)&addr, + ((family == AF_INET) ? sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6))); + + if (err == 0 && listen(sock, 30) == 0) + pause(); + + close(sock); + return 0; +} + +int set_flags(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + u_int32_t flags = 0; + u_int32_t token = 0; + u_int16_t rport = 0; + u_int16_t family; + void *rip = NULL; + int nest_start; + int use_id = 0; + u_int8_t id; + int off = 0; + int arg = 2; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SET_FLAGS, + MPTCP_PM_VER); + + if (argc < 3) + syntax(argv); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + if (!strcmp(argv[arg], "id")) { + if (++arg >= argc) + error(1, 0, " missing id value"); + + use_id = 1; + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } else { + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[arg], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[arg], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[arg]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + } + + if (++arg >= argc) + error(1, 0, " missing flags keyword"); + + for (; arg < argc; arg++) { + if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token value"); + + /* token */ + token = strtoul(argv[arg], NULL, 10); + } else if (!strcmp(argv[arg], "flags")) { + char *tok, *str; + + /* flags */ + if (++arg >= argc) + error(1, 0, " missing flags value"); + + for (str = argv[arg]; (tok = strtok(str, ",")); + str = NULL) { + if (!strcmp(tok, "backup")) + flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; + else if (strcmp(tok, "nobackup") && + strcmp(tok, "nofullmesh")) + error(1, errno, + "unknown flag %s", argv[arg]); + } + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + u_int16_t port; + + if (use_id) + error(1, 0, " port can't be used with id"); + + if (++arg >= argc) + error(1, 0, " missing port value"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "rport")) { + if (++arg >= argc) + error(1, 0, " missing remote port"); + + rport = atoi(argv[arg]); + } else if (!strcmp(argv[arg], "rip")) { + if (++arg >= argc) + error(1, 0, " missing remote ip"); + + rip = argv[arg]; + } else { + error(1, 0, "unknown keyword %s", argv[arg]); + } + } + nest->rta_len = off - nest_start; + + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + + /* remote addr/port */ + if (rip) { + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, rip, RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", (char *)rip); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + if (rport) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &rport, 2); + off += NLMSG_ALIGN(rta->rta_len); + } + + nest->rta_len = off - nest_start; + } + + do_nl_req(fd, nh, off, 0); + return 0; +} + int main(int argc, char *argv[]) { - int fd, pm_family; + int events_mcast_grp; + int pm_family; + int fd; if (argc < 2) syntax(argv); @@ -595,10 +1518,18 @@ int main(int argc, char *argv[]) if (fd == -1) error(1, errno, "socket netlink"); - pm_family = resolve_mptcp_pm_netlink(fd); + resolve_mptcp_pm_netlink(fd, &pm_family, &events_mcast_grp); if (!strcmp(argv[1], "add")) return add_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "ann")) + return announce_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "rem")) + return remove_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "csf")) + return csf(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "dsf")) + return dsf(fd, pm_family, argc, argv); else if (!strcmp(argv[1], "del")) return del_addr(fd, pm_family, argc, argv); else if (!strcmp(argv[1], "flush")) @@ -609,6 +1540,12 @@ int main(int argc, char *argv[]) return dump_addrs(fd, pm_family, argc, argv); else if (!strcmp(argv[1], "limits")) return get_set_limits(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "set")) + return set_flags(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "events")) + return capture_events(fd, events_mcast_grp); + else if (!strcmp(argv[1], "listen")) + return add_listener(argc, argv); fprintf(stderr, "unknown sub-command: %s", argv[1]); syntax(argv); diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 026384c189c9..abc5648b59ab 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=450 +timeout=1800 diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh new file mode 100755 index 000000000000..f74e1c3c126d --- /dev/null +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -0,0 +1,298 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + +ns1="" +ns2="" +ns3="" +capture=false +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) +# a bit more space: because we have more to display +MPTCP_LIB_TEST_FORMAT="%02u %-60s" +ret=0 +bail=0 +slack=50 +large="" +small="" +sout="" +cout="" +capout="" +size=0 + +usage() { + echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" + echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-d: debug this script" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" +} + +# This function is used in the cleanup trap +#shellcheck disable=SC2317 +cleanup() +{ + rm -f "$cout" "$sout" + rm -f "$large" "$small" + rm -f "$capout" + + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" +} + +mptcp_lib_check_mptcp +mptcp_lib_check_tools ip tc + +# "$ns1" ns2 ns3 +# ns1eth1 ns2eth1 ns2eth3 ns3eth1 +# netem +# ns1eth2 ns2eth2 +# netem + +setup() +{ + large=$(mktemp) + small=$(mktemp) + sout=$(mktemp) + cout=$(mktemp) + capout=$(mktemp) + size=$((2 * 2048 * 4096)) + + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 + dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 + + trap cleanup EXIT + + mptcp_lib_ns_init ns1 ns2 ns3 + + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2" + ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3" + + ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad + ip -net "$ns1" link set ns1eth1 up mtu 1500 + ip -net "$ns1" route add default via 10.0.1.2 + ip -net "$ns1" route add default via dead:beef:1::2 + + ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2 + ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad + ip -net "$ns1" link set ns1eth2 up mtu 1500 + ip -net "$ns1" route add default via 10.0.2.2 metric 101 + ip -net "$ns1" route add default via dead:beef:2::2 metric 101 + + mptcp_lib_pm_nl_set_limits "${ns1}" 1 1 + mptcp_lib_pm_nl_add_endpoint "${ns1}" 10.0.2.1 dev ns1eth2 flags subflow + + ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 + ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad + ip -net "$ns2" link set ns2eth1 up mtu 1500 + + ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2 + ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad + ip -net "$ns2" link set ns2eth2 up mtu 1500 + + ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3 + ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad + ip -net "$ns2" link set ns2eth3 up mtu 1500 + ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1 + ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1 + + ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1 + ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad + ip -net "$ns3" link set ns3eth1 up mtu 1500 + ip -net "$ns3" route add default via 10.0.3.2 + ip -net "$ns3" route add default via dead:beef:3::2 + + mptcp_lib_pm_nl_set_limits "${ns3}" 1 1 + + # debug build can slow down measurably the test program + # we use quite tight time limit on the run-time, to ensure + # maximum B/W usage. + # Use kmemleak/lockdep/kasan/prove_locking presence as a rough + # estimate for this being a debug kernel and increase the + # maximum run-time accordingly. Observed run times for CI builds + # running selftests, including kbuild, were used to determine the + # amount of time to add. + grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) +} + +do_transfer() +{ + local cin=$1 + local sin=$2 + local max_time=$3 + local port + port=$((10000+MPTCP_LIB_TEST_COUNTER)) + + :> "$cout" + :> "$sout" + :> "$capout" + + if $capture; then + local capuser + local rndh="${ns1:4}" + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + local capfile="${rndh}-${port}" + local capopt="-i any -s 65535 -B 32768 ${capuser}" + + ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + local cappid_listener=$! + + ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! + + sleep 1 + fi + + timeout ${timeout_test} \ + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ + 0.0.0.0 < "$sin" > "$sout" & + local spid=$! + + mptcp_lib_wait_local_port_listen "${ns3}" "${port}" + + timeout ${timeout_test} \ + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ + 10.0.3.3 < "$cin" > "$cout" & + local cpid=$! + + wait $cpid + local retc=$? + wait $spid + local rets=$? + + if $capture; then + sleep 1 + kill ${cappid_listener} + kill ${cappid_connector} + fi + + cmp $sin $cout > /dev/null 2>&1 + local cmps=$? + cmp $cin $sout > /dev/null 2>&1 + local cmpc=$? + + printf "%-16s" " max $max_time " + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + mptcp_lib_pr_ok + cat "$capout" + return 0 + fi + + mptcp_lib_pr_fail + echo "client exit code $retc, server $rets" 1>&2 + echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 + ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" + echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2 + ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" + ls -l $sin $cout + ls -l $cin $sout + + cat "$capout" + return 1 +} + +run_test() +{ + local rate1=$1 + local rate2=$2 + local delay1=$3 + local delay2=$4 + local lret + local dev + shift 4 + local msg=$* + + [ $delay1 -gt 0 ] && delay1="delay ${delay1}ms" || delay1="" + [ $delay2 -gt 0 ] && delay2="delay ${delay2}ms" || delay2="" + + for dev in ns1eth1 ns1eth2; do + tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1 + done + for dev in ns2eth1 ns2eth2; do + tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1 + done + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 + + # time is measured in ms, account for transfer size, aggregated link speed + # and header overhead (10%) + # ms byte -> bit 10% mbit -> kbit -> bit 10% + local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) )) + + # mptcp_connect will do some sleeps to allow the mp_join handshake + # completion (see mptcp_connect): 200ms on each side, add some slack + time=$((time + 400 + slack)) + + mptcp_lib_print_title "$msg" + do_transfer $small $large $time + lret=$? + mptcp_lib_result_code "${lret}" "${msg}" + if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi + + msg+=" - reverse direction" + mptcp_lib_print_title "${msg}" + do_transfer $large $small $time + lret=$? + mptcp_lib_result_code "${lret}" "${msg}" + if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi +} + +while getopts "bcdhi" option;do + case "$option" in + "h") + usage $0 + exit ${KSFT_PASS} + ;; + "b") + bail=1 + ;; + "c") + capture=true + ;; + "d") + set -x + ;; + "i") + mptcp_lib_set_ip_mptcp + ;; + "?") + usage $0 + exit ${KSFT_FAIL} + ;; + esac +done + +setup +run_test 10 10 0 0 "balanced bwidth" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" + +# we still need some additional infrastructure to pass the following test-cases +MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" + +mptcp_lib_result_print_all_tap +exit $ret diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh new file mode 100755 index 000000000000..9e2981f2d7f5 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -0,0 +1,897 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it. +#shellcheck disable=SC2086 + +# Some variables are used below but indirectly, see verify_*_event() +#shellcheck disable=SC2034 + +. "$(dirname "${0}")/mptcp_lib.sh" + +mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms + +if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + echo "userspace pm tests are not supported by the kernel: SKIP" + exit ${KSFT_SKIP} +fi +mptcp_lib_check_tools ip + +ANNOUNCED=${MPTCP_LIB_EVENT_ANNOUNCED} +REMOVED=${MPTCP_LIB_EVENT_REMOVED} +SUB_ESTABLISHED=${MPTCP_LIB_EVENT_SUB_ESTABLISHED} +SUB_CLOSED=${MPTCP_LIB_EVENT_SUB_CLOSED} +LISTENER_CREATED=${MPTCP_LIB_EVENT_LISTENER_CREATED} +LISTENER_CLOSED=${MPTCP_LIB_EVENT_LISTENER_CLOSED} + +AF_INET=${MPTCP_LIB_AF_INET} +AF_INET6=${MPTCP_LIB_AF_INET6} + +file="" +server_evts="" +client_evts="" +server_evts_pid=0 +client_evts_pid=0 +client4_pid=0 +server4_pid=0 +client6_pid=0 +server6_pid=0 +client4_token="" +server4_token="" +client6_token="" +server6_token="" +client4_port=0; +client6_port=0; +app4_port=50002 +new4_port=50003 +app6_port=50004 +client_addr_id=${RANDOM:0:2} +server_addr_id=${RANDOM:0:2} + +ns1="" +ns2="" +ret=0 +test_name="" +# a bit more space: because we have more to display +MPTCP_LIB_TEST_FORMAT="%02u %-68s" + +print_title() +{ + mptcp_lib_pr_info "${1}" +} + +# $1: test name +print_test() +{ + test_name="${1}" + + mptcp_lib_print_title "${test_name}" +} + +test_pass() +{ + mptcp_lib_pr_ok + mptcp_lib_result_pass "${test_name}" +} + +test_skip() +{ + mptcp_lib_pr_skip + mptcp_lib_result_skip "${test_name}" +} + +# $1: msg +test_fail() +{ + if [ ${#} -gt 0 ] + then + mptcp_lib_pr_fail "${@}" + fi + ret=${KSFT_FAIL} + mptcp_lib_result_fail "${test_name}" +} + +# This function is used in the cleanup trap +#shellcheck disable=SC2317 +cleanup() +{ + print_title "Cleanup" + + # Terminate the MPTCP connection and related processes + local pid + for pid in $client4_pid $server4_pid $client6_pid $server6_pid\ + $server_evts_pid $client_evts_pid + do + mptcp_lib_kill_wait $pid + done + + mptcp_lib_ns_exit "${ns1}" "${ns2}" + + rm -rf $file $client_evts $server_evts + + mptcp_lib_pr_info "Done" +} + +trap cleanup EXIT + +# Create and configure network namespaces for testing +mptcp_lib_ns_init ns1 ns2 +for i in "$ns1" "$ns2" ;do + ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 +done + +# "$ns1" ns2 +# ns1eth2 ns2eth1 + +ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + +# Add IPv4/v6 addresses to the namespaces +ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2 +ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2 +ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad +ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad +ip -net "$ns1" link set ns1eth2 up + +ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 +ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth1 +ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad +ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad +ip -net "$ns2" link set ns2eth1 up + +file=$(mktemp) +mptcp_lib_make_file "$file" 2 1 + +# Capture netlink events over the two network namespaces running +# the MPTCP client and server +client_evts=$(mktemp) +mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid +server_evts=$(mktemp) +mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid +sleep 0.5 + +print_title "Init" +print_test "Created network namespaces ns1, ns2" +test_pass + +make_connection() +{ + local is_v6=$1 + local app_port=$app4_port + local connect_addr="10.0.1.1" + local listen_addr="0.0.0.0" + if [ "$is_v6" = "v6" ] + then + connect_addr="dead:beef:1::1" + listen_addr="::" + app_port=$app6_port + else + is_v6="v4" + fi + + :>"$client_evts" + :>"$server_evts" + + # Run the server + ip netns exec "$ns1" \ + ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 & + local server_pid=$! + sleep 0.5 + + # Run the client, transfer $file and stay connected to the server + # to conduct tests + ip netns exec "$ns2" \ + ./mptcp_connect -s MPTCP -w 300 -m sendfile -p $app_port $connect_addr\ + 2>&1 > /dev/null < "$file" & + local client_pid=$! + sleep 1 + + # Capture client/server attributes from MPTCP connection netlink events + + local client_token + local client_port + local client_serverside + local server_token + local server_serverside + + client_token=$(mptcp_lib_evts_get_info token "$client_evts") + client_port=$(mptcp_lib_evts_get_info sport "$client_evts") + client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts") + server_token=$(mptcp_lib_evts_get_info token "$server_evts") + server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts") + + print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" + if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && + [ "$server_serverside" = 1 ] + then + test_pass + else + test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + if [ "$is_v6" = "v6" ] + then + client6_token=$client_token + server6_token=$server_token + client6_port=$client_port + client6_pid=$client_pid + server6_pid=$server_pid + else + client4_token=$client_token + server4_token=$server_token + client4_port=$client_port + client4_pid=$client_pid + server4_pid=$server_pid + fi +} + +# $@: all var names to check +check_expected() +{ + if mptcp_lib_check_expected "${@}" + then + test_pass + return 0 + fi + + test_fail + return 1 +} + +verify_announce_event() +{ + local evt=$1 + local e_type=$2 + local e_token=$3 + local e_addr=$4 + local e_id=$5 + local e_dport=$6 + local e_af=$7 + local type + local token + local addr + local dport + local id + + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + if [ "$e_af" = "v6" ] + then + addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) + else + addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) + fi + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) + + check_expected "type" "token" "addr" "dport" "id" +} + +test_announce() +{ + print_title "Announce tests" + + # Capture events on the network namespace running the server + :>"$server_evts" + + # ADD_ADDR using an invalid token should result in no action + local invalid_token=$(( client4_token - 1)) + ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token $invalid_token id\ + $client_addr_id dev ns2eth1 > /dev/null 2>&1 + + local type + type=$(mptcp_lib_evts_get_info type "$server_evts") + print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token" + if [ "$type" = "" ] + then + test_pass + else + test_fail "type defined: ${type}" + fi + + # ADD_ADDR from the client to server machine reusing the subflow port + :>"$server_evts" + ip netns exec "$ns2"\ + ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ + ns2eth1 + print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port" + sleep 0.5 + verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ + "$client4_port" + + # ADD_ADDR6 from the client to server machine reusing the subflow port + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl ann\ + dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 + print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port" + sleep 0.5 + verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ + "$client_addr_id" "$client6_port" "v6" + + # ADD_ADDR from the client to server machine using a new port + :>"$server_evts" + client_addr_id=$((client_addr_id+1)) + ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ + $client_addr_id dev ns2eth1 port $new4_port + print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port" + sleep 0.5 + verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ + "$client_addr_id" "$new4_port" + + # Capture events on the network namespace running the client + :>"$client_evts" + + # ADD_ADDR from the server to client machine reusing the subflow port + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + "$server_addr_id" "$app4_port" + + # ADD_ADDR6 from the server to client machine reusing the subflow port + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port" + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ + "$server_addr_id" "$app6_port" "v6" + + # ADD_ADDR from the server to client machine using a new port + :>"$client_evts" + server_addr_id=$((server_addr_id+1)) + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ + $server_addr_id dev ns1eth2 port $new4_port + print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port" + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + "$server_addr_id" "$new4_port" +} + +verify_remove_event() +{ + local evt=$1 + local e_type=$2 + local e_token=$3 + local e_id=$4 + local type + local token + local id + + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) + + check_expected "type" "token" "id" +} + +test_remove() +{ + print_title "Remove tests" + + # Capture events on the network namespace running the server + :>"$server_evts" + + # RM_ADDR using an invalid token should result in no action + local invalid_token=$(( client4_token - 1 )) + ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\ + $client_addr_id > /dev/null 2>&1 + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" + local type + type=$(mptcp_lib_evts_get_info type "$server_evts") + if [ "$type" = "" ] + then + test_pass + else + test_fail "unexpected type: ${type}" + fi + + # RM_ADDR using an invalid addr id should result in no action + local invalid_id=$(( client_addr_id + 1 )) + ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ + $invalid_id > /dev/null 2>&1 + print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" + type=$(mptcp_lib_evts_get_info type "$server_evts") + if [ "$type" = "" ] + then + test_pass + else + test_fail "unexpected type: ${type}" + fi + + # RM_ADDR from the client to server machine + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ + $client_addr_id + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + sleep 0.5 + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" + + # RM_ADDR from the client to server machine + :>"$server_evts" + client_addr_id=$(( client_addr_id - 1 )) + ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ + $client_addr_id + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + sleep 0.5 + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" + + # RM_ADDR6 from the client to server machine + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ + $client_addr_id + print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1" + sleep 0.5 + verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" + + # Capture events on the network namespace running the client + :>"$client_evts" + + # RM_ADDR from the server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ + $server_addr_id + print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + sleep 0.5 + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" + + # RM_ADDR from the server to client machine + :>"$client_evts" + server_addr_id=$(( server_addr_id - 1 )) + ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ + $server_addr_id + print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + sleep 0.5 + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" + + # RM_ADDR6 from the server to client machine + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ + $server_addr_id + print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2" + sleep 0.5 + verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" +} + +verify_subflow_events() +{ + local evt=$1 + local e_type=$2 + local e_token=$3 + local e_family=$4 + local e_saddr=$5 + local e_daddr=$6 + local e_dport=$7 + local e_locid=$8 + local e_remid=$9 + shift 2 + local e_from=$8 + local e_to=$9 + local type + local token + local family + local saddr + local daddr + local dport + local locid + local remid + local info + + info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" + + if [ "$e_type" = "$SUB_ESTABLISHED" ] + then + if [ "$e_family" = "$AF_INET6" ] + then + print_test "CREATE_SUBFLOW6 ${info}" + else + print_test "CREATE_SUBFLOW ${info}" + fi + else + if [ "$e_family" = "$AF_INET6" ] + then + print_test "DESTROY_SUBFLOW6 ${info}" + else + print_test "DESTROY_SUBFLOW ${info}" + fi + fi + + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + family=$(mptcp_lib_evts_get_info family "$evt" $e_type) + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type) + remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) + if [ "$family" = "$AF_INET6" ] + then + saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) + else + saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) + fi + + check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid" +} + +test_subflows() +{ + print_title "Subflows v4 or v6 only tests" + + # Capture events on the network namespace running the server + :>"$server_evts" + + # Attempt to add a listener at 10.0.2.2:<subflow-port> + ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ + "$client4_port" & + local listener_pid=$! + + # ADD_ADDR from client to server machine reusing the subflow port + ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ + $client_addr_id + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ + rport "$client4_port" token "$server4_token" + sleep 0.5 + verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \ + "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" + + # Delete the listener from the client ns, if one was created + mptcp_lib_kill_wait $listener_pid + + local sport + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ + "$client4_port" token "$server4_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" + + # RM_ADDR from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ + "$client4_token" + sleep 0.5 + + # Attempt to add a listener at dead:beef:2::2:<subflow-port> + ip netns exec "$ns2" ./pm_nl_ctl listen dead:beef:2::2\ + "$client6_port" & + listener_pid=$! + + # ADD_ADDR6 from client to server machine reusing the subflow port + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\ + $client_addr_id + sleep 0.5 + + # CREATE_SUBFLOW6 from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\ + dead:beef:2::2 rport "$client6_port" token "$server6_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ + "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ + "$client_addr_id" "ns1" "ns2" + + # Delete the listener from the client ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW6 from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\ + dead:beef:2::2 rport "$client6_port" token "$server6_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ + "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ + "$client_addr_id" "ns1" "ns2" + + # RM_ADDR from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ + "$client6_token" + sleep 0.5 + + # Attempt to add a listener at 10.0.2.2:<new-port> + ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ + $new4_port & + listener_pid=$! + + # ADD_ADDR from client to server machine using a new port + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ + $client_addr_id port $new4_port + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\ + $new4_port token "$server4_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ + "10.0.2.1" "10.0.2.2" "$new4_port" "23"\ + "$client_addr_id" "ns1" "ns2" + + # Delete the listener from the client ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ + $new4_port token "$server4_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2" + + # RM_ADDR from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ + "$client4_token" + + # Capture events on the network namespace running the client + :>"$client_evts" + + # Attempt to add a listener at 10.0.2.1:<subflow-port> + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app4_port & + listener_pid=$! + + # ADD_ADDR from server to client machine reusing the subflow port + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ + $server_addr_id + sleep 0.5 + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app4_port token "$client4_token" + sleep 0.5 + verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\ + "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app4_port token "$client4_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server4_token" + sleep 0.5 + + # Attempt to add a listener at dead:beef:2::1:<subflow-port> + ip netns exec "$ns1" ./pm_nl_ctl listen dead:beef:2::1\ + $app6_port & + listener_pid=$! + + # ADD_ADDR6 from server to client machine reusing the subflow port + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ + $server_addr_id + sleep 0.5 + + # CREATE_SUBFLOW6 from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\ + dead:beef:2::1 rport $app6_port token "$client6_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET6" "dead:beef:2::2"\ + "dead:beef:2::1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW6 from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\ + dead:beef:2::1 rport $app6_port token "$client6_token" + sleep 0.5 + verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\ + "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1" + + # RM_ADDR6 from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" + sleep 0.5 + + # Attempt to add a listener at 10.0.2.1:<new-port> + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $new4_port & + listener_pid=$! + + # ADD_ADDR from server to client machine using a new port + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ + $server_addr_id port $new4_port + sleep 0.5 + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $new4_port token "$client4_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ + "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $new4_port token "$client4_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server4_token" +} + +test_subflows_v4_v6_mix() +{ + print_title "Subflows v4 and v6 mix tests" + + # Attempt to add a listener at 10.0.2.1:<subflow-port> + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app6_port & + local listener_pid=$! + + # ADD_ADDR4 from server to client machine reusing the subflow port on + # the established v6 connection + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ + "$server_addr_id" "$app6_port" + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app6_port token "$client6_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app6_port token "$client6_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" + sleep 0.5 +} + +test_prio() +{ + print_title "Prio tests" + + local count + + # Send MP_PRIO signal from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$app4_port" + sleep 0.5 + + # Check TX + print_test "MP_PRIO TX" + count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then + test_fail "Count != 1: ${count}" + else + test_pass + fi + + # Check RX + print_test "MP_PRIO RX" + count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then + test_fail "Count != 1: ${count}" + else + test_pass + fi +} + +verify_listener_events() +{ + if mptcp_lib_verify_listener_events "${@}"; then + test_pass + else + test_fail + fi +} + +test_listener() +{ + print_title "Listener tests" + + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_test "LISTENER events" + test_skip + return + fi + + # Capture events on the network namespace running the client + :>$client_evts + + # Attempt to add a listener at 10.0.2.2:<subflow-port> + ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\ + $client4_port & + local listener_pid=$! + + sleep 0.5 + print_test "CREATE_LISTENER 10.0.2.2:$client4_port" + verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port + + # ADD_ADDR from client to server machine reusing the subflow port + ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\ + $client_addr_id + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ + rport $client4_port token $server4_token + sleep 0.5 + + # Delete the listener from the client ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sleep 0.5 + print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" + verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port +} + +print_title "Make connections" +make_connection +make_connection "v6" + +test_announce +test_remove +test_subflows +test_subflows_v4_v6_mix +test_prio +test_listener + +mptcp_lib_result_print_all_tap +exit ${ret} |