diff options
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r-- | tools/testing/selftests/net/.gitignore | 4 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 5 | ||||
-rw-r--r-- | tools/testing/selftests/net/config | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/net/fib-onlink-tests.sh | 48 | ||||
-rwxr-xr-x | tools/testing/selftests/net/icmp_redirect.sh | 485 | ||||
-rw-r--r-- | tools/testing/selftests/net/ipv6_flowlabel.c | 229 | ||||
-rwxr-xr-x | tools/testing/selftests/net/ipv6_flowlabel.sh | 21 | ||||
-rw-r--r-- | tools/testing/selftests/net/ipv6_flowlabel_mgr.c | 199 | ||||
-rwxr-xr-x | tools/testing/selftests/net/pmtu.sh | 17 | ||||
-rw-r--r-- | tools/testing/selftests/net/so_txtime.c | 296 | ||||
-rwxr-xr-x | tools/testing/selftests/net/so_txtime.sh | 31 | ||||
-rw-r--r-- | tools/testing/selftests/net/tcp_fastopen_backup_key.c | 336 | ||||
-rwxr-xr-x | tools/testing/selftests/net/tcp_fastopen_backup_key.sh | 55 |
13 files changed, 1713 insertions, 15 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 6f81130605d7..4ce0bc1612f5 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -17,3 +17,7 @@ tcp_inq tls txring_overwrite ip_defrag +so_txtime +flowlabel +flowlabel_mgr +tcp_fastopen_backup_key diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 1e6d14d2825c..9a275d932fd5 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -9,12 +9,15 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh -TEST_PROGS += test_vxlan_fdb_changelink.sh +TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh +TEST_PROGS += tcp_fastopen_backup_key.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag +TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr +TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 474040448601..89f84b5118bf 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -25,3 +25,5 @@ CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NET_SCH_FQ=m +CONFIG_NET_SCH_ETF=m diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index 864f865eee55..c287b90b8af8 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -4,6 +4,7 @@ # IPv4 and IPv6 onlink tests PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +VERBOSE=0 # Network interfaces # - odd in current namespace; even in peer ns @@ -91,10 +92,10 @@ log_test() if [ ${rc} -eq ${expected} ]; then nsuccess=$((nsuccess+1)) - printf "\n TEST: %-50s [ OK ]\n" "${msg}" + printf " TEST: %-50s [ OK ]\n" "${msg}" else nfail=$((nfail+1)) - printf "\n TEST: %-50s [FAIL]\n" "${msg}" + printf " TEST: %-50s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" @@ -121,9 +122,23 @@ log_subsection() run_cmd() { - echo - echo "COMMAND: $*" - eval $* + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + fi + + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc } get_linklocal() @@ -451,11 +466,34 @@ run_onlink_tests() } ################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -p Pause on fail + -v verbose mode (show commands and output) +EOF +} + +################################################################################ # main nsuccess=0 nfail=0 +while getopts :t:pPhv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + cleanup setup run_onlink_tests diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh new file mode 100755 index 000000000000..76a7c4472dc3 --- /dev/null +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -0,0 +1,485 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# redirect test +# +# .253 +----+ +# +----| r1 | +# | +----+ +# +----+ | |.1 +# | h1 |--------------+ | 10.1.1.0/30 2001:db8:1::0/126 +# +----+ .1 | |.2 +# 172.16.1/24 | +----+ +----+ +# 2001:db8:16:1/64 +----| r2 |-------------------| h2 | +# .254 +----+ .254 .2 +----+ +# 172.16.2/24 +# 2001:db8:16:2/64 +# +# Route from h1 to h2 goes through r1, eth1 - connection between r1 and r2. +# Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent +# from r1 to h1 telling h1 to use r2 when talking to h2. + +VERBOSE=0 +PAUSE_ON_FAIL=no + +H1_N1_IP=172.16.1.1 +R1_N1_IP=172.16.1.253 +R2_N1_IP=172.16.1.254 + +H1_N1_IP6=2001:db8:16:1::1 +R1_N1_IP6=2001:db8:16:1::253 +R2_N1_IP6=2001:db8:16:1::254 + +R1_R2_N1_IP=10.1.1.1 +R2_R1_N1_IP=10.1.1.2 + +R1_R2_N1_IP6=2001:db8:1::1 +R2_R1_N1_IP6=2001:db8:1::2 + +H2_N2=172.16.2.0/24 +H2_N2_6=2001:db8:16:2::/64 +H2_N2_IP=172.16.2.2 +R2_N2_IP=172.16.2.254 +H2_N2_IP6=2001:db8:16:2::2 +R2_N2_IP6=2001:db8:16:2::254 + +VRF=red +VRF_TABLE=1111 + +################################################################################ +# helpers + +log_section() +{ + echo + echo "###########################################################################" + echo "$*" + echo "###########################################################################" + echo +} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +log_debug() +{ + if [ "$VERBOSE" = "1" ]; then + echo "$*" + fi +} + +run_cmd() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo "$out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +get_linklocal() +{ + local ns=$1 + local dev=$2 + local addr + + addr=$(ip -netns $ns -6 -br addr show dev ${dev} | \ + awk '{ + for (i = 3; i <= NF; ++i) { + if ($i ~ /^fe80/) + print $i + } + }' + ) + addr=${addr/\/*} + + [ -z "$addr" ] && return 1 + + echo $addr + + return 0 +} + +################################################################################ +# setup and teardown + +cleanup() +{ + local ns + + for ns in h1 h2 r1 r2; do + ip netns del $ns 2>/dev/null + done +} + +create_vrf() +{ + local ns=$1 + + ip -netns ${ns} link add ${VRF} type vrf table ${VRF_TABLE} + ip -netns ${ns} link set ${VRF} up + ip -netns ${ns} route add vrf ${VRF} unreachable default metric 8192 + ip -netns ${ns} -6 route add vrf ${VRF} unreachable default metric 8192 + + ip -netns ${ns} addr add 127.0.0.1/8 dev ${VRF} + ip -netns ${ns} -6 addr add ::1 dev ${VRF} nodad + + ip -netns ${ns} ru del pref 0 + ip -netns ${ns} ru add pref 32765 from all lookup local + ip -netns ${ns} -6 ru del pref 0 + ip -netns ${ns} -6 ru add pref 32765 from all lookup local +} + +setup() +{ + local ns + + # + # create nodes as namespaces + # + for ns in h1 h2 r1 r2; do + ip netns add $ns + ip -netns $ns li set lo up + + case "${ns}" in + h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + ;; + r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 + + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 + esac + done + + # + # create interconnects + # + ip -netns h1 li add eth0 type veth peer name r1h1 + ip -netns h1 li set r1h1 netns r1 name eth0 up + + ip -netns h1 li add eth1 type veth peer name r2h1 + ip -netns h1 li set r2h1 netns r2 name eth0 up + + ip -netns h2 li add eth0 type veth peer name r2h2 + ip -netns h2 li set eth0 up + ip -netns h2 li set r2h2 netns r2 name eth2 up + + ip -netns r1 li add eth1 type veth peer name r2r1 + ip -netns r1 li set eth1 up + ip -netns r1 li set r2r1 netns r2 name eth1 up + + # + # h1 + # + if [ "${WITH_VRF}" = "yes" ]; then + create_vrf "h1" + H1_VRF_ARG="vrf ${VRF}" + H1_PING_ARG="-I ${VRF}" + else + H1_VRF_ARG= + H1_PING_ARG= + fi + ip -netns h1 li add br0 type bridge + if [ "${WITH_VRF}" = "yes" ]; then + ip -netns h1 li set br0 vrf ${VRF} up + else + ip -netns h1 li set br0 up + fi + ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns h1 li set eth0 master br0 up + ip -netns h1 li set eth1 master br0 up + + # + # h2 + # + ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns h2 ro add default via ${R2_N2_IP} dev eth0 + ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0 + + # + # r1 + # + ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30 + ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad + + # + # r2 + # + ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30 + ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad + ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24 + ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad + + sleep 2 + + R1_LLADDR=$(get_linklocal r1 eth0) + if [ $? -ne 0 ]; then + echo "Error: Failed to get link-local address of r1's eth0" + exit 1 + fi + log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}" + + R2_LLADDR=$(get_linklocal r2 eth0) + if [ $? -ne 0 ]; then + echo "Error: Failed to get link-local address of r2's eth0" + exit 1 + fi + log_debug "initial gateway is R2's lladdr = ${R2_LLADDR}" +} + +change_h2_mtu() +{ + local mtu=$1 + + run_cmd ip -netns h2 li set eth0 mtu ${mtu} + run_cmd ip -netns r2 li set eth2 mtu ${mtu} +} + +check_exception() +{ + local mtu="$1" + local with_redirect="$2" + local desc="$3" + + # From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102) + if [ "$VERBOSE" = "1" ]; then + echo "Commands to check for exception:" + run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} + run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} + fi + + if [ -n "${mtu}" ]; then + mtu=" mtu ${mtu}" + fi + if [ "$with_redirect" = "yes" ]; then + ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + grep -q "cache <redirected> expires [0-9]*sec${mtu}" + elif [ -n "${mtu}" ]; then + ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + grep -q "cache expires [0-9]*sec${mtu}" + else + # want to verify that neither mtu nor redirected appears in + # the route get output. The -v will wipe out the cache line + # if either are set so the last grep -q will not find a match + ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + grep -E -v 'mtu|redirected' | grep -q "cache" + fi + log_test $? 0 "IPv4: ${desc}" + + if [ "$with_redirect" = "yes" ]; then + ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}" + elif [ -n "${mtu}" ]; then + ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + grep -q "${mtu}" + else + # IPv6 is a bit harder. First strip out the match if it + # contains an mtu exception and then look for the first + # gateway - R1's lladdr + ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + grep -v "mtu" | grep -q "${R1_LLADDR}" + fi + log_test $? 0 "IPv6: ${desc}" +} + +run_ping() +{ + local sz=$1 + + run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} +} + +replace_route_legacy() +{ + # r1 to h2 via r2 and eth0 + run_cmd ip -netns r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 + run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 +} + +reset_route_legacy() +{ + run_cmd ip -netns r1 ro del ${H2_N2} + run_cmd ip -netns r1 -6 ro del ${H2_N2_6} + + run_cmd ip -netns h1 ro del ${H1_VRF_ARG} ${H2_N2} + run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} + + initial_route_legacy +} + +initial_route_legacy() +{ + # r1 to h2 via r2 and eth1 + run_cmd ip -netns r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 + + # h1 to h2 via r1 + # - IPv6 redirect only works if gateway is the LLA + run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 + run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 +} + +check_connectivity() +{ + local rc + + run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} + rc=$? + run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} + [ $? -ne 0 ] && rc=$? + + return $rc +} + +do_test() +{ + local ttype="$1" + + eval initial_route_${ttype} + + # verify connectivity + check_connectivity + if [ $? -ne 0 ]; then + echo "Error: Basic connectivity is broken" + ret=1 + return + fi + + # redirect exception followed by mtu + eval replace_route_${ttype} + run_ping 64 + check_exception "" "yes" "redirect exception" + + check_connectivity + if [ $? -ne 0 ]; then + echo "Error: Basic connectivity is broken after redirect" + ret=1 + return + fi + + change_h2_mtu 1300 + run_ping 1350 + check_exception "1300" "yes" "redirect exception plus mtu" + + # remove exceptions and restore routing + change_h2_mtu 1500 + eval reset_route_${ttype} + + check_connectivity + if [ $? -ne 0 ]; then + echo "Error: Basic connectivity is broken after reset" + ret=1 + return + fi + check_exception "" "no" "routing reset" + + # MTU exception followed by redirect + change_h2_mtu 1300 + run_ping 1350 + check_exception "1300" "no" "mtu exception" + + eval replace_route_${ttype} + run_ping 64 + check_exception "1300" "yes" "mtu exception plus redirect" + + check_connectivity + if [ $? -ne 0 ]; then + echo "Error: Basic connectivity is broken after redirect" + ret=1 + return + fi +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -p Pause on fail + -v verbose mode (show commands and output) +EOF +} + +################################################################################ +# main + +# Some systems don't have a ping6 binary anymore +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + +ret=0 +nsuccess=0 +nfail=0 + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + *) usage; exit 1;; + esac +done + +trap cleanup EXIT + +cleanup +WITH_VRF=no +setup + +log_section "Legacy routing" +do_test "legacy" + +cleanup +log_section "Legacy routing with VRF" +WITH_VRF=yes +setup +do_test "legacy" + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c new file mode 100644 index 000000000000..a7c41375374f --- /dev/null +++ b/tools/testing/selftests/net/ipv6_flowlabel.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Test IPV6_FLOWINFO cmsg on send and recv */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/in6.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +/* uapi/glibc weirdness may leave this undefined */ +#ifndef IPV6_FLOWINFO +#define IPV6_FLOWINFO 11 +#endif + +#ifndef IPV6_FLOWLABEL_MGR +#define IPV6_FLOWLABEL_MGR 32 +#endif + +#define FLOWLABEL_WILDCARD ((uint32_t) -1) + +static const char cfg_data[] = "a"; +static uint32_t cfg_label = 1; + +static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel) +{ + char control[CMSG_SPACE(sizeof(flowlabel))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + int ret; + + iov.iov_base = (char *)cfg_data; + iov.iov_len = sizeof(cfg_data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (with_flowlabel) { + struct cmsghdr *cm; + + cm = (void *)control; + cm->cmsg_len = CMSG_LEN(sizeof(flowlabel)); + cm->cmsg_level = SOL_IPV6; + cm->cmsg_type = IPV6_FLOWINFO; + *(uint32_t *)CMSG_DATA(cm) = htonl(flowlabel); + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + } + + ret = sendmsg(fd, &msg, 0); + if (ret == -1) + error(1, errno, "send"); + + if (with_flowlabel) + fprintf(stderr, "sent with label %u\n", flowlabel); + else + fprintf(stderr, "sent without label\n"); +} + +static void do_recv(int fd, bool with_flowlabel, uint32_t expect) +{ + char control[CMSG_SPACE(sizeof(expect))]; + char data[sizeof(cfg_data)]; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + uint32_t flowlabel; + int ret; + + iov.iov_base = data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + memset(control, 0, sizeof(control)); + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + ret = recvmsg(fd, &msg, 0); + if (ret == -1) + error(1, errno, "recv"); + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) + error(1, 0, "recv: truncated"); + if (ret != sizeof(cfg_data)) + error(1, 0, "recv: length mismatch"); + if (memcmp(data, cfg_data, sizeof(data))) + error(1, 0, "recv: data mismatch"); + + cm = CMSG_FIRSTHDR(&msg); + if (with_flowlabel) { + if (!cm) + error(1, 0, "recv: missing cmsg"); + if (CMSG_NXTHDR(&msg, cm)) + error(1, 0, "recv: too many cmsg"); + if (cm->cmsg_level != SOL_IPV6 || + cm->cmsg_type != IPV6_FLOWINFO) + error(1, 0, "recv: unexpected cmsg level or type"); + + flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm)); + fprintf(stderr, "recv with label %u\n", flowlabel); + + if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) + fprintf(stderr, "recv: incorrect flowlabel %u != %u\n", + flowlabel, expect); + + } else { + fprintf(stderr, "recv without label\n"); + } +} + +static bool get_autoflowlabel_enabled(void) +{ + int fd, ret; + char val; + + fd = open("/proc/sys/net/ipv6/auto_flowlabels", O_RDONLY); + if (fd == -1) + error(1, errno, "open sysctl"); + + ret = read(fd, &val, 1); + if (ret == -1) + error(1, errno, "read sysctl"); + if (ret == 0) + error(1, 0, "read sysctl: 0"); + + if (close(fd)) + error(1, errno, "close sysctl"); + + return val == '1'; +} + +static void flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags) +{ + struct in6_flowlabel_req req = { + .flr_action = IPV6_FL_A_GET, + .flr_label = htonl(label), + .flr_flags = flags, + .flr_share = share, + }; + + /* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */ + req.flr_dst.s6_addr[0] = 0xfd; + req.flr_dst.s6_addr[15] = 0x1; + + if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req))) + error(1, errno, "setsockopt flowlabel get"); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "l:")) != -1) { + switch (c) { + case 'l': + cfg_label = strtoul(optarg, NULL, 0); + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = htons(8000), + .sin6_addr = IN6ADDR_LOOPBACK_INIT, + }; + const int one = 1; + int fdt, fdr; + + parse_opts(argc, argv); + + fdt = socket(PF_INET6, SOCK_DGRAM, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + fdr = socket(PF_INET6, SOCK_DGRAM, 0); + if (fdr == -1) + error(1, errno, "socket r"); + + if (connect(fdt, (void *)&addr, sizeof(addr))) + error(1, errno, "connect"); + if (bind(fdr, (void *)&addr, sizeof(addr))) + error(1, errno, "bind"); + + flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE); + + if (setsockopt(fdr, SOL_IPV6, IPV6_FLOWINFO, &one, sizeof(one))) + error(1, errno, "setsockopt flowinfo"); + + if (get_autoflowlabel_enabled()) { + fprintf(stderr, "send no label: recv auto flowlabel\n"); + do_send(fdt, false, 0); + do_recv(fdr, true, FLOWLABEL_WILDCARD); + } else { + fprintf(stderr, "send no label: recv no label (auto off)\n"); + do_send(fdt, false, 0); + do_recv(fdr, false, 0); + } + + fprintf(stderr, "send label\n"); + do_send(fdt, true, cfg_label); + do_recv(fdr, true, cfg_label); + + if (close(fdr)) + error(1, errno, "close r"); + if (close(fdt)) + error(1, errno, "close t"); + + return 0; +} diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh new file mode 100755 index 000000000000..d3bc6442704e --- /dev/null +++ b/tools/testing/selftests/net/ipv6_flowlabel.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Regression tests for IPv6 flowlabels +# +# run in separate namespaces to avoid mgmt db conflicts betweent tests + +set -e + +echo "TEST management" +./in_netns.sh ./ipv6_flowlabel_mgr + +echo "TEST datapath" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=0 && ./ipv6_flowlabel -l 1' + +echo "TEST datapath (with auto-flowlabels)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1' + +echo OK. All tests passed diff --git a/tools/testing/selftests/net/ipv6_flowlabel_mgr.c b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c new file mode 100644 index 000000000000..af95b48acea9 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Test IPV6_FLOWINFO_MGR */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <limits.h> +#include <linux/in6.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +/* uapi/glibc weirdness may leave this undefined */ +#ifndef IPV6_FLOWLABEL_MGR +#define IPV6_FLOWLABEL_MGR 32 +#endif + +/* from net/ipv6/ip6_flowlabel.c */ +#define FL_MIN_LINGER 6 + +#define explain(x) \ + do { if (cfg_verbose) fprintf(stderr, " " x "\n"); } while (0) + +#define __expect(x) \ + do { \ + if (!(x)) \ + fprintf(stderr, "[OK] " #x "\n"); \ + else \ + error(1, 0, "[ERR] " #x " (line %d)", __LINE__); \ + } while (0) + +#define expect_pass(x) __expect(x) +#define expect_fail(x) __expect(!(x)) + +static bool cfg_long_running; +static bool cfg_verbose; + +static int flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags) +{ + struct in6_flowlabel_req req = { + .flr_action = IPV6_FL_A_GET, + .flr_label = htonl(label), + .flr_flags = flags, + .flr_share = share, + }; + + /* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */ + req.flr_dst.s6_addr[0] = 0xfd; + req.flr_dst.s6_addr[15] = 0x1; + + return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req)); +} + +static int flowlabel_put(int fd, uint32_t label) +{ + struct in6_flowlabel_req req = { + .flr_action = IPV6_FL_A_PUT, + .flr_label = htonl(label), + }; + + return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req)); +} + +static void run_tests(int fd) +{ + int wstatus; + pid_t pid; + + explain("cannot get non-existent label"); + expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0)); + + explain("cannot put non-existent label"); + expect_fail(flowlabel_put(fd, 1)); + + explain("cannot create label greater than 20 bits"); + expect_fail(flowlabel_get(fd, 0x1FFFFF, IPV6_FL_S_ANY, + IPV6_FL_F_CREATE)); + + explain("create a new label (FL_F_CREATE)"); + expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE)); + explain("can get the label (without FL_F_CREATE)"); + expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0)); + explain("can get it again with create flag set, too"); + expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE)); + explain("cannot get it again with the exclusive (FL_FL_EXCL) flag"); + expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY, + IPV6_FL_F_CREATE | IPV6_FL_F_EXCL)); + explain("can now put exactly three references"); + expect_pass(flowlabel_put(fd, 1)); + expect_pass(flowlabel_put(fd, 1)); + expect_pass(flowlabel_put(fd, 1)); + expect_fail(flowlabel_put(fd, 1)); + + explain("create a new exclusive label (FL_S_EXCL)"); + expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE)); + explain("cannot get it again in non-exclusive mode"); + expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY, IPV6_FL_F_CREATE)); + explain("cannot get it again in exclusive mode either"); + expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE)); + expect_pass(flowlabel_put(fd, 2)); + + if (cfg_long_running) { + explain("cannot reuse the label, due to linger"); + expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY, + IPV6_FL_F_CREATE)); + explain("after sleep, can reuse"); + sleep(FL_MIN_LINGER * 2 + 1); + expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_ANY, + IPV6_FL_F_CREATE)); + } + + explain("create a new user-private label (FL_S_USER)"); + expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, IPV6_FL_F_CREATE)); + explain("cannot get it again in non-exclusive mode"); + expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_ANY, 0)); + explain("cannot get it again in exclusive mode"); + expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_EXCL, 0)); + explain("can get it again in user mode"); + expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0)); + explain("child process can get it too, but not after setuid(nobody)"); + pid = fork(); + if (pid == -1) + error(1, errno, "fork"); + if (!pid) { + expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0)); + if (setuid(USHRT_MAX)) + fprintf(stderr, "[INFO] skip setuid child test\n"); + else + expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0)); + exit(0); + } + if (wait(&wstatus) == -1) + error(1, errno, "wait"); + if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) + error(1, errno, "wait: unexpected child result"); + + explain("create a new process-private label (FL_S_PROCESS)"); + expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, IPV6_FL_F_CREATE)); + explain("can get it again"); + expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0)); + explain("child process cannot can get it"); + pid = fork(); + if (pid == -1) + error(1, errno, "fork"); + if (!pid) { + expect_fail(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0)); + exit(0); + } + if (wait(&wstatus) == -1) + error(1, errno, "wait"); + if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) + error(1, errno, "wait: unexpected child result"); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "lv")) != -1) { + switch (c) { + case 'l': + cfg_long_running = true; + break; + case 'v': + cfg_verbose = true; + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + int fd; + + parse_opts(argc, argv); + + fd = socket(PF_INET6, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket"); + + run_tests(fd); + + if (close(fd)) + error(1, errno, "close"); + + return 0; +} diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 317dafcd605d..4a1275990d7e 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -152,10 +152,10 @@ tests=" cleanup_ipv4_exception ipv4: cleanup of cached exceptions cleanup_ipv6_exception ipv6: cleanup of cached exceptions" -NS_A="ns-$(mktemp -u XXXXXX)" -NS_B="ns-$(mktemp -u XXXXXX)" -NS_R1="ns-$(mktemp -u XXXXXX)" -NS_R2="ns-$(mktemp -u XXXXXX)" +NS_A="ns-A" +NS_B="ns-B" +NS_R1="ns-R1" +NS_R2="ns-R2" ns_a="ip netns exec ${NS_A}" ns_b="ip netns exec ${NS_B}" ns_r1="ip netns exec ${NS_R1}" @@ -212,7 +212,6 @@ dummy6_0_prefix="fc00:1000::" dummy6_1_prefix="fc00:1001::" dummy6_mask="64" -cleanup_done=1 err_buf= tcpdump_pids= @@ -495,7 +494,7 @@ setup_routing() { setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip - cleanup_done=0 + cleanup for arg do eval setup_${arg} || { echo " ${arg} not supported"; return 1; } done @@ -519,11 +518,9 @@ cleanup() { done tcpdump_pids= - [ ${cleanup_done} -eq 1 ] && return for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do ip netns del ${n} 2> /dev/null done - cleanup_done=1 } mtu() { @@ -1136,6 +1133,9 @@ done trap cleanup EXIT +# start clean +cleanup + for t in ${tests}; do [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0 @@ -1156,7 +1156,6 @@ for t in ${tests}; do eval test_${name} ret=$? - cleanup if [ $ret -eq 0 ]; then printf "TEST: %-60s [ OK ]\n" "${t}" diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c new file mode 100644 index 000000000000..53f598f06647 --- /dev/null +++ b/tools/testing/selftests/net/so_txtime.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test the SO_TXTIME API + * + * Takes two streams of { payload, delivery time }[], one input and one output. + * Sends the input stream and verifies arrival matches the output stream. + * The two streams can differ due to out-of-order delivery and drops. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <linux/net_tstamp.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +static int cfg_clockid = CLOCK_TAI; +static bool cfg_do_ipv4; +static bool cfg_do_ipv6; +static uint16_t cfg_port = 8000; +static int cfg_variance_us = 2000; + +static uint64_t glob_tstart; + +/* encode one timed transmission (of a 1B payload) */ +struct timed_send { + char data; + int64_t delay_us; +}; + +#define MAX_NUM_PKT 8 +static struct timed_send cfg_in[MAX_NUM_PKT]; +static struct timed_send cfg_out[MAX_NUM_PKT]; +static int cfg_num_pkt; + +static uint64_t gettime_ns(void) +{ + struct timespec ts; + + if (clock_gettime(cfg_clockid, &ts)) + error(1, errno, "gettime"); + + return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec; +} + +static void do_send_one(int fdt, struct timed_send *ts) +{ + char control[CMSG_SPACE(sizeof(uint64_t))]; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + uint64_t tdeliver; + int ret; + + iov.iov_base = &ts->data; + iov.iov_len = 1; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (ts->delay_us >= 0) { + memset(control, 0, sizeof(control)); + msg.msg_control = &control; + msg.msg_controllen = sizeof(control); + + tdeliver = glob_tstart + ts->delay_us * 1000; + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_level = SOL_SOCKET; + cm->cmsg_type = SCM_TXTIME; + cm->cmsg_len = CMSG_LEN(sizeof(tdeliver)); + memcpy(CMSG_DATA(cm), &tdeliver, sizeof(tdeliver)); + } + + ret = sendmsg(fdt, &msg, 0); + if (ret == -1) + error(1, errno, "write"); + if (ret == 0) + error(1, 0, "write: 0B"); + +} + +static void do_recv_one(int fdr, struct timed_send *ts) +{ + int64_t tstop, texpect; + char rbuf[2]; + int ret; + + ret = recv(fdr, rbuf, sizeof(rbuf), 0); + if (ret == -1) + error(1, errno, "read"); + if (ret != 1) + error(1, 0, "read: %dB", ret); + + tstop = (gettime_ns() - glob_tstart) / 1000; + texpect = ts->delay_us >= 0 ? ts->delay_us : 0; + + fprintf(stderr, "payload:%c delay:%ld expected:%ld (us)\n", + rbuf[0], tstop, texpect); + + if (rbuf[0] != ts->data) + error(1, 0, "payload mismatch. expected %c", ts->data); + + if (labs(tstop - texpect) > cfg_variance_us) + error(1, 0, "exceeds variance (%d us)", cfg_variance_us); +} + +static void do_recv_verify_empty(int fdr) +{ + char rbuf[1]; + int ret; + + ret = recv(fdr, rbuf, sizeof(rbuf), 0); + if (ret != -1 || errno != EAGAIN) + error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno); +} + +static void setsockopt_txtime(int fd) +{ + struct sock_txtime so_txtime_val = { .clockid = cfg_clockid }; + struct sock_txtime so_txtime_val_read = { 0 }; + socklen_t vallen = sizeof(so_txtime_val); + + if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, + &so_txtime_val, sizeof(so_txtime_val))) + error(1, errno, "setsockopt txtime"); + + if (getsockopt(fd, SOL_SOCKET, SO_TXTIME, + &so_txtime_val_read, &vallen)) + error(1, errno, "getsockopt txtime"); + + if (vallen != sizeof(so_txtime_val) || + memcmp(&so_txtime_val, &so_txtime_val_read, vallen)) + error(1, 0, "getsockopt txtime: mismatch"); +} + +static int setup_tx(struct sockaddr *addr, socklen_t alen) +{ + int fd; + + fd = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket t"); + + if (connect(fd, addr, alen)) + error(1, errno, "connect"); + + setsockopt_txtime(fd); + + return fd; +} + +static int setup_rx(struct sockaddr *addr, socklen_t alen) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + int fd; + + fd = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket r"); + + if (bind(fd, addr, alen)) + error(1, errno, "bind"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + return fd; +} + +static void do_test(struct sockaddr *addr, socklen_t alen) +{ + int fdt, fdr, i; + + fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n", + addr->sa_family == PF_INET ? '4' : '6', + cfg_clockid == CLOCK_TAI ? "tai" : "monotonic"); + + fdt = setup_tx(addr, alen); + fdr = setup_rx(addr, alen); + + glob_tstart = gettime_ns(); + + for (i = 0; i < cfg_num_pkt; i++) + do_send_one(fdt, &cfg_in[i]); + for (i = 0; i < cfg_num_pkt; i++) + do_recv_one(fdr, &cfg_out[i]); + + do_recv_verify_empty(fdr); + + if (close(fdr)) + error(1, errno, "close r"); + if (close(fdt)) + error(1, errno, "close t"); +} + +static int parse_io(const char *optarg, struct timed_send *array) +{ + char *arg, *tok; + int aoff = 0; + + arg = strdup(optarg); + if (!arg) + error(1, errno, "strdup"); + + while ((tok = strtok(arg, ","))) { + arg = NULL; /* only pass non-zero on first call */ + + if (aoff / 2 == MAX_NUM_PKT) + error(1, 0, "exceeds max pkt count (%d)", MAX_NUM_PKT); + + if (aoff & 1) { /* parse delay */ + array->delay_us = strtol(tok, NULL, 0) * 1000; + array++; + } else { /* parse character */ + array->data = tok[0]; + } + + aoff++; + } + + free(arg); + + return aoff / 2; +} + +static void parse_opts(int argc, char **argv) +{ + int c, ilen, olen; + + while ((c = getopt(argc, argv, "46c:")) != -1) { + switch (c) { + case '4': + cfg_do_ipv4 = true; + break; + case '6': + cfg_do_ipv6 = true; + break; + case 'c': + if (!strcmp(optarg, "tai")) + cfg_clockid = CLOCK_TAI; + else if (!strcmp(optarg, "monotonic") || + !strcmp(optarg, "mono")) + cfg_clockid = CLOCK_MONOTONIC; + else + error(1, 0, "unknown clock id %s", optarg); + break; + default: + error(1, 0, "parse error at %d", optind); + } + } + + if (argc - optind != 2) + error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]); + + ilen = parse_io(argv[optind], cfg_in); + olen = parse_io(argv[optind + 1], cfg_out); + if (ilen != olen) + error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen); + cfg_num_pkt = ilen; +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_do_ipv6) { + struct sockaddr_in6 addr6 = {0}; + + addr6.sin6_family = AF_INET6; + addr6.sin6_port = htons(cfg_port); + addr6.sin6_addr = in6addr_loopback; + do_test((void *)&addr6, sizeof(addr6)); + } + + if (cfg_do_ipv4) { + struct sockaddr_in addr4 = {0}; + + addr4.sin_family = AF_INET; + addr4.sin_port = htons(cfg_port); + addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + do_test((void *)&addr4, sizeof(addr4)); + } + + return 0; +} diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh new file mode 100755 index 000000000000..5aa519328a5b --- /dev/null +++ b/tools/testing/selftests/net/so_txtime.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Regression tests for the SO_TXTIME interface + +# Run in network namespace +if [[ $# -eq 0 ]]; then + ./in_netns.sh $0 __subprocess + exit $? +fi + +set -e + +tc qdisc add dev lo root fq +./so_txtime -4 -6 -c mono a,-1 a,-1 +./so_txtime -4 -6 -c mono a,0 a,0 +./so_txtime -4 -6 -c mono a,10 a,10 +./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20 +./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20 + +if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then + ! ./so_txtime -4 -6 -c tai a,-1 a,-1 + ! ./so_txtime -4 -6 -c tai a,0 a,0 + ./so_txtime -4 -6 -c tai a,10 a,10 + ./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20 + ./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20 +else + echo "tc ($(tc -V)) does not support qdisc etf. skipping" +fi + +echo OK. All tests passed diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c new file mode 100644 index 000000000000..58bb77d9e7e1 --- /dev/null +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test key rotation for TFO. + * New keys are 'rotated' in two steps: + * 1) Add new key as the 'backup' key 'behind' the primary key + * 2) Make new key the primary by swapping the backup and primary keys + * + * The rotation is done in stages using multiple sockets bound + * to the same port via SO_REUSEPORT. This simulates key rotation + * behind say a load balancer. We verify that across the rotation + * there are no cases in which a cookie is not accepted by verifying + * that TcpExtTCPFastOpenPassiveFail remains 0. + */ +#define _GNU_SOURCE +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <unistd.h> +#include <netinet/tcp.h> +#include <fcntl.h> +#include <time.h> + +#ifndef TCP_FASTOPEN_KEY +#define TCP_FASTOPEN_KEY 33 +#endif + +#define N_LISTEN 10 +#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key" +#define KEY_LENGTH 16 + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + +static bool do_ipv6; +static bool do_sockopt; +static bool do_rotate; +static int key_len = KEY_LENGTH; +static int rcv_fds[N_LISTEN]; +static int proc_fd; +static const char *IP4_ADDR = "127.0.0.1"; +static const char *IP6_ADDR = "::1"; +static const int PORT = 8891; + +static void get_keys(int fd, uint32_t *keys) +{ + char buf[128]; + int len = KEY_LENGTH * 2; + + if (do_sockopt) { + if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len)) + error(1, errno, "Unable to get key"); + return; + } + lseek(proc_fd, 0, SEEK_SET); + if (read(proc_fd, buf, sizeof(buf)) <= 0) + error(1, errno, "Unable to read %s", PROC_FASTOPEN_KEY); + if (sscanf(buf, "%x-%x-%x-%x,%x-%x-%x-%x", keys, keys + 1, keys + 2, + keys + 3, keys + 4, keys + 5, keys + 6, keys + 7) != 8) + error(1, 0, "Unable to parse %s", PROC_FASTOPEN_KEY); +} + +static void set_keys(int fd, uint32_t *keys) +{ + char buf[128]; + + if (do_sockopt) { + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, + key_len)) + error(1, errno, "Unable to set key"); + return; + } + if (do_rotate) + snprintf(buf, 128, "%08x-%08x-%08x-%08x,%08x-%08x-%08x-%08x", + keys[0], keys[1], keys[2], keys[3], keys[4], keys[5], + keys[6], keys[7]); + else + snprintf(buf, 128, "%08x-%08x-%08x-%08x", + keys[0], keys[1], keys[2], keys[3]); + lseek(proc_fd, 0, SEEK_SET); + if (write(proc_fd, buf, sizeof(buf)) <= 0) + error(1, errno, "Unable to write %s", PROC_FASTOPEN_KEY); +} + +static void build_rcv_fd(int family, int proto, int *rcv_fds) +{ + struct sockaddr_in addr4 = {0}; + struct sockaddr_in6 addr6 = {0}; + struct sockaddr *addr; + int opt = 1, i, sz; + int qlen = 100; + uint32_t keys[8]; + + switch (family) { + case AF_INET: + addr4.sin_family = family; + addr4.sin_addr.s_addr = htonl(INADDR_ANY); + addr4.sin_port = htons(PORT); + sz = sizeof(addr4); + addr = (struct sockaddr *)&addr4; + break; + case AF_INET6: + addr6.sin6_family = AF_INET6; + addr6.sin6_addr = in6addr_any; + addr6.sin6_port = htons(PORT); + sz = sizeof(addr6); + addr = (struct sockaddr *)&addr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + /* clang does not recognize error() above as terminating + * the program, so it complains that saddr, sz are + * not initialized when this code path is taken. Silence it. + */ + return; + } + for (i = 0; i < ARRAY_SIZE(keys); i++) + keys[i] = rand(); + for (i = 0; i < N_LISTEN; i++) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + if (bind(rcv_fds[i], addr, sz)) + error(1, errno, "failed to bind receive socket"); + if (setsockopt(rcv_fds[i], SOL_TCP, TCP_FASTOPEN, &qlen, + sizeof(qlen))) + error(1, errno, "failed to set TCP_FASTOPEN"); + set_keys(rcv_fds[i], keys); + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static int connect_and_send(int family, int proto) +{ + struct sockaddr_in saddr4 = {0}; + struct sockaddr_in daddr4 = {0}; + struct sockaddr_in6 saddr6 = {0}; + struct sockaddr_in6 daddr6 = {0}; + struct sockaddr *saddr, *daddr; + int fd, sz, ret; + char data[1]; + + switch (family) { + case AF_INET: + saddr4.sin_family = AF_INET; + saddr4.sin_addr.s_addr = htonl(INADDR_ANY); + saddr4.sin_port = 0; + + daddr4.sin_family = AF_INET; + if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", IP4_ADDR); + daddr4.sin_port = htons(PORT); + + sz = sizeof(saddr4); + saddr = (struct sockaddr *)&saddr4; + daddr = (struct sockaddr *)&daddr4; + break; + case AF_INET6: + saddr6.sin6_family = AF_INET6; + saddr6.sin6_addr = in6addr_any; + + daddr6.sin6_family = AF_INET6; + if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", IP6_ADDR); + daddr6.sin6_port = htons(PORT); + + sz = sizeof(saddr6); + saddr = (struct sockaddr *)&saddr6; + daddr = (struct sockaddr *)&daddr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + /* clang does not recognize error() above as terminating + * the program, so it complains that saddr, daddr, sz are + * not initialized when this code path is taken. Silence it. + */ + return -1; + } + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + if (bind(fd, saddr, sz)) + error(1, errno, "failed to bind send socket"); + data[0] = 'a'; + ret = sendto(fd, data, 1, MSG_FASTOPEN, daddr, sz); + if (ret != 1) + error(1, errno, "failed to sendto"); + + return fd; +} + +static bool is_listen_fd(int fd) +{ + int i; + + for (i = 0; i < N_LISTEN; i++) { + if (rcv_fds[i] == fd) + return true; + } + return false; +} + +static int rotate_key(int fd) +{ + static int iter; + static uint32_t new_key[4]; + uint32_t keys[8]; + uint32_t tmp_key[4]; + int i; + int len = KEY_LENGTH * 2; + + if (iter < N_LISTEN) { + /* first set new key as backups */ + if (iter == 0) { + for (i = 0; i < ARRAY_SIZE(new_key); i++) + new_key[i] = rand(); + } + get_keys(fd, keys); + memcpy(keys + 4, new_key, KEY_LENGTH); + set_keys(fd, keys); + } else { + /* swap the keys */ + get_keys(fd, keys); + memcpy(tmp_key, keys + 4, KEY_LENGTH); + memcpy(keys + 4, keys, KEY_LENGTH); + memcpy(keys, tmp_key, KEY_LENGTH); + set_keys(fd, keys); + } + if (++iter >= (N_LISTEN * 2)) + iter = 0; +} + +static void run_one_test(int family) +{ + struct epoll_event ev; + int i, send_fd; + int n_loops = 10000; + int rotate_key_fd = 0; + int key_rotate_interval = 50; + int fd, epfd; + char buf[1]; + + build_rcv_fd(family, SOCK_STREAM, rcv_fds); + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + ev.events = EPOLLIN; + for (i = 0; i < N_LISTEN; i++) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + while (n_loops--) { + send_fd = connect_and_send(family, SOCK_STREAM); + if (do_rotate && ((n_loops % key_rotate_interval) == 0)) { + rotate_key(rcv_fds[rotate_key_fd]); + if (++rotate_key_fd >= N_LISTEN) + rotate_key_fd = 0; + } + while (1) { + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + if (is_listen_fd(ev.data.fd)) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + ev.data.fd = fd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev)) + error(1, errno, "failed epoll add"); + continue; + } + i = recv(ev.data.fd, buf, sizeof(buf), 0); + if (i != 1) + error(1, errno, "failed recv data"); + if (epoll_ctl(epfd, EPOLL_CTL_DEL, ev.data.fd, NULL)) + error(1, errno, "failed epoll del"); + close(ev.data.fd); + break; + } + close(send_fd); + } + for (i = 0; i < N_LISTEN; i++) + close(rcv_fds[i]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "46sr")) != -1) { + switch (c) { + case '4': + do_ipv6 = false; + break; + case '6': + do_ipv6 = true; + break; + case 's': + do_sockopt = true; + break; + case 'r': + do_rotate = true; + key_len = KEY_LENGTH * 2; + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + proc_fd = open(PROC_FASTOPEN_KEY, O_RDWR); + if (proc_fd < 0) + error(1, errno, "Unable to open %s", PROC_FASTOPEN_KEY); + srand(time(NULL)); + if (do_ipv6) + run_one_test(AF_INET6); + else + run_one_test(AF_INET); + close(proc_fd); + fprintf(stderr, "PASS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh new file mode 100755 index 000000000000..41476399e184 --- /dev/null +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# rotate TFO keys for ipv4/ipv6 and verify that the client does +# not present an invalid cookie. + +set +x +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" sysctl -w net.ipv4.tcp_fastopen=3 \ + >/dev/null 2>&1 +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +do_test() { + # flush routes before each run, otherwise successive runs can + # initially present an old TFO cookie + ip netns exec "${NETNS}" ip tcp_metrics flush + ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1" + val=$(ip netns exec "${NETNS}" nstat -az | \ + grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}') + if [ $val -ne 0 ]; then + echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero" + return 1 + fi +} + +do_test "-4" +do_test "-6" +do_test "-4" +do_test "-6" +do_test "-4s" +do_test "-6s" +do_test "-4s" +do_test "-6s" +do_test "-4r" +do_test "-6r" +do_test "-4r" +do_test "-6r" +do_test "-4sr" +do_test "-6sr" +do_test "-4sr" +do_test "-6sr" +echo "all tests done" |