diff options
Diffstat (limited to 'tools/testing/selftests/net/pmtu.sh')
-rwxr-xr-x | tools/testing/selftests/net/pmtu.sh | 931 |
1 files changed, 864 insertions, 67 deletions
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 77c09cd339c3..cfc84958025a 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Check that route PMTU values match expectations, and that initial device MTU @@ -26,6 +26,15 @@ # - pmtu_ipv6 # Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 # +# - pmtu_ipv4_dscp_icmp_exception +# Set up the same network topology as pmtu_ipv4, but use non-default +# routing table in A. A fib-rule is used to jump to this routing table +# based on DSCP. Send ICMPv4 packets with the expected DSCP value and +# verify that ECN doesn't interfere with the creation of PMTU exceptions. +# +# - pmtu_ipv4_dscp_udp_exception +# Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP. +# # - pmtu_ipv4_vxlan4_exception # Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel # over IPv4 between A and B, routed via R1. On the link between R1 and B, @@ -59,6 +68,45 @@ # Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of # VXLAN # +# - pmtu_ipv{4,6}_br_vxlan{4,6}_exception +# Set up three namespaces, A, B, and C, with routing between A and B over +# R1. R2 is unused in these tests. A has a veth connection to C, and is +# connected to B via a VXLAN endpoint, which is directly bridged to C. +# MTU on the B-R1 link is lower than other MTUs. +# +# Check that both C and A are able to communicate with B over the VXLAN +# tunnel, and that PMTU exceptions with the correct values are created. +# +# segment a_r1 segment b_r1 b_r1: 4000 +# .--------------R1--------------. everything +# C---veth A B else: 5000 +# ' bridge | +# '---- - - - - - VXLAN - - - - - - - ' +# +# - pmtu_ipv{4,6}_br_geneve{4,6}_exception +# Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel +# instead. +# +# - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception +# Set up two namespaces, B, and C, with routing between the init namespace +# and B over R1. A and R2 are unused in these tests. The init namespace +# has a veth connection to C, and is connected to B via a VXLAN endpoint, +# which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link +# is lower than other MTUs. +# +# Check that C is able to communicate with B over the VXLAN tunnel, and +# that PMTU exceptions with the correct values are created. +# +# segment a_r1 segment b_r1 b_r1: 4000 +# .--------------R1--------------. everything +# C---veth init B else: 5000 +# '- ovs | +# '---- - - - - - VXLAN - - - - - - - ' +# +# - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception +# Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel +# instead. +# # - pmtu_ipv{4,6}_fou{4,6}_exception # Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation # (FoU) over IPv4/IPv6, instead of VXLAN @@ -79,6 +127,16 @@ # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU # changes alone won't affect PMTU # +# - pmtu_vti4_udp_exception +# Same as pmtu_vti4_exception, but using ESP-in-UDP +# +# - pmtu_vti4_udp_routed_exception +# Set up vti tunnel on top of veth connected through routing namespace and +# add xfrm states and policies with ESP-in-UDP encapsulation. Check that +# route exception is not created if link layer MTU is not exceeded, then +# lower MTU on second part of routed environment and check that exception +# is created with the expected PMTU. +# # - pmtu_vti6_exception # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is @@ -86,6 +144,13 @@ # decrease and increase MTU of tunnel, checking that route exception PMTU # changes accordingly # +# - pmtu_vti6_udp_exception +# Same as pmtu_vti6_exception, but using ESP-in-UDP +# +# - pmtu_vti6_udp_routed_exception +# Same as pmtu_vti6_udp_routed_exception but with routing between vti +# endpoints +# # - pmtu_vti4_default_mtu # Set up vti4 tunnel on top of veth, in two namespaces with matching # endpoints. Check that MTU assigned to vti interface is the MTU of the @@ -123,10 +188,18 @@ # - list_flush_ipv6_exception # Using the same topology as in pmtu_ipv6, create exceptions, and check # they are shown when listing exception caches, gone after flushing them +# +# - pmtu_ipv4_route_change +# Use the same topology as in pmtu_ipv4, but issue a route replacement +# command and delete the corresponding device afterward. This tests for +# proper cleanup of the PMTU exceptions by the route replacement path. +# Device unregistration should complete successfully +# +# - pmtu_ipv6_route_change +# Same as above but with IPv6 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh +source net_helper.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -139,6 +212,8 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) tests=" pmtu_ipv4_exception ipv4: PMTU exceptions 1 pmtu_ipv6_exception ipv6: PMTU exceptions 1 + pmtu_ipv4_dscp_icmp_exception ICMPv4 with DSCP and ECN: PMTU exceptions 1 + pmtu_ipv4_dscp_udp_exception UDPv4 with DSCP and ECN: PMTU exceptions 1 pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1 pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1 pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1 @@ -147,6 +222,22 @@ tests=" pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1 pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1 pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1 + pmtu_ipv4_br_vxlan4_exception IPv4, bridged vxlan4: PMTU exceptions 1 + pmtu_ipv6_br_vxlan4_exception IPv6, bridged vxlan4: PMTU exceptions 1 + pmtu_ipv4_br_vxlan6_exception IPv4, bridged vxlan6: PMTU exceptions 1 + pmtu_ipv6_br_vxlan6_exception IPv6, bridged vxlan6: PMTU exceptions 1 + pmtu_ipv4_br_geneve4_exception IPv4, bridged geneve4: PMTU exceptions 1 + pmtu_ipv6_br_geneve4_exception IPv6, bridged geneve4: PMTU exceptions 1 + pmtu_ipv4_br_geneve6_exception IPv4, bridged geneve6: PMTU exceptions 1 + pmtu_ipv6_br_geneve6_exception IPv6, bridged geneve6: PMTU exceptions 1 + pmtu_ipv4_ovs_vxlan4_exception IPv4, OVS vxlan4: PMTU exceptions 1 + pmtu_ipv6_ovs_vxlan4_exception IPv6, OVS vxlan4: PMTU exceptions 1 + pmtu_ipv4_ovs_vxlan6_exception IPv4, OVS vxlan6: PMTU exceptions 1 + pmtu_ipv6_ovs_vxlan6_exception IPv6, OVS vxlan6: PMTU exceptions 1 + pmtu_ipv4_ovs_geneve4_exception IPv4, OVS geneve4: PMTU exceptions 1 + pmtu_ipv6_ovs_geneve4_exception IPv6, OVS geneve4: PMTU exceptions 1 + pmtu_ipv4_ovs_geneve6_exception IPv4, OVS geneve6: PMTU exceptions 1 + pmtu_ipv6_ovs_geneve6_exception IPv6, OVS geneve6: PMTU exceptions 1 pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1 pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1 pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1 @@ -161,6 +252,10 @@ tests=" pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 pmtu_vti6_exception vti6: PMTU exceptions 0 pmtu_vti4_exception vti4: PMTU exceptions 0 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 pmtu_vti4_default_mtu vti4: default MTU assignment 0 pmtu_vti6_default_mtu vti6: default MTU assignment 0 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 @@ -169,16 +264,9 @@ tests=" cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 - list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" - -NS_A="ns-A" -NS_B="ns-B" -NS_R1="ns-R1" -NS_R2="ns-R2" -ns_a="ip netns exec ${NS_A}" -ns_b="ip netns exec ${NS_B}" -ns_r1="ip netns exec ${NS_R1}" -ns_r2="ip netns exec ${NS_R2}" + list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 + pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an @@ -212,7 +300,6 @@ routes=" A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 B default ${prefix6}:${b_r1}::2 " - USE_NH="no" # ns family nh id destination gateway nexthops=" @@ -237,11 +324,16 @@ routes_nh=" B 6 default 61 " +policy_mark=0x04 +rt_table=main + veth4_a_addr="192.168.1.1" veth4_b_addr="192.168.1.2" +veth4_c_addr="192.168.2.10" veth4_mask="24" veth6_a_addr="fd00:1::a" veth6_b_addr="fd00:1::b" +veth6_c_addr="fd00:2::c" veth6_mask="64" tunnel4_a_addr="192.168.2.1" @@ -257,6 +349,9 @@ dummy6_mask="64" err_buf= tcpdump_pids= +nettest_pids= +socat_pids= +tmpoutfile= err() { err_buf="${err_buf}${1} @@ -285,6 +380,16 @@ run_cmd() { return $rc } +run_cmd_bg() { + cmd="$*" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: %s &\n" "${cmd}" + fi + + $cmd 2>&1 & +} + # Find the auto-generated name for this namespace nsname() { eval echo \$NS_$1 @@ -296,7 +401,7 @@ setup_fou_or_gue() { encap="${3}" if [ "${outer}" = "4" ]; then - modprobe fou || return 2 + modprobe fou || return $ksft_skip a_addr="${prefix4}.${a_r1}.1" b_addr="${prefix4}.${b_r1}.1" if [ "${inner}" = "4" ]; then @@ -307,7 +412,7 @@ setup_fou_or_gue() { ipproto="41" fi else - modprobe fou6 || return 2 + modprobe fou6 || return $ksft_skip a_addr="${prefix6}:${a_r1}::1" b_addr="${prefix6}:${b_r1}::1" if [ "${inner}" = "4" ]; then @@ -321,8 +426,8 @@ setup_fou_or_gue() { fi fi - run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 - run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip + run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 @@ -396,7 +501,7 @@ setup_ipvX_over_ipvY() { fi fi - run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2 + run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode} run_cmd ${ns_a} ip link set ip_a up @@ -428,13 +533,17 @@ setup_ip6ip6() { } setup_namespaces() { - for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do - ip netns add ${n} || return 1 - + setup_ns NS_A NS_B NS_C NS_R1 NS_R2 + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do # Disable DAD, so that we don't have to wait to use the # configured IPv6 addresses ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 done + ns_a="ip netns exec ${NS_A}" + ns_b="ip netns exec ${NS_B}" + ns_c="ip netns exec ${NS_C}" + ns_r1="ip netns exec ${NS_R1}" + ns_r2="ip netns exec ${NS_R2}" } setup_veth() { @@ -479,11 +588,20 @@ setup_vti6() { setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} } +setup_vti4routed() { + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} +} + +setup_vti6routed() { + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + setup_vxlan_or_geneve() { type="${1}" a_addr="${2}" b_addr="${3}" opts="${4}" + br_if_a="${5}" if [ "${type}" = "vxlan" ]; then opts="${opts} ttl 64 dstport 4789" @@ -497,10 +615,16 @@ setup_vxlan_or_geneve() { run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} - run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a - run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b + if [ -n "${br_if_a}" ]; then + run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a} + run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a} + run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a} + else + run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a + run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a + fi - run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a + run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b run_cmd ${ns_a} ip link set ${type}_a up @@ -516,29 +640,65 @@ setup_vxlan4() { } setup_geneve6() { - setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 + setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" } setup_vxlan6() { - setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 + setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" +} + +setup_bridged_geneve4() { + setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0" +} + +setup_bridged_vxlan4() { + setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0" +} + +setup_bridged_geneve6() { + setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0" +} + +setup_bridged_vxlan6() { + setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0" } setup_xfrm() { proto=${1} veth_a_addr="${2}" veth_b_addr="${3}" + encap=${4} - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } +setup_nettest_xfrm() { + if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + return 1 + fi + fi + + [ ${1} -eq 6 ] && proto="-6" || proto="" + port=${2} + + run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5 + nettest_pids="${nettest_pids} $!" + + run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5 + nettest_pids="${nettest_pids} $!" +} + setup_xfrm4() { setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} } @@ -547,6 +707,26 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_xfrm4udp() { + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udp() { + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 +} + +setup_xfrm4udprouted() { + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udprouted() { + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 +} + setup_routing_old() { for i in ${routes}; do [ "${ns}" = "" ] && ns="${i}" && continue @@ -555,7 +735,7 @@ setup_routing_old() { ns_name="$(nsname ${ns})" - ip -n ${ns_name} route add ${addr} via ${gw} + ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}" ns=""; addr=""; gw="" done @@ -585,7 +765,7 @@ setup_routing_new() { ns_name="$(nsname ${ns})" - ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid} + ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}" ns=""; fam=""; addr=""; nhid="" done @@ -630,10 +810,101 @@ setup_routing() { return 0 } +setup_policy_routing() { + setup_routing + + ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \ + table "${rt_table}" + + # Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to + # have an option do to it. + tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio + tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio + tc -netns "${NS_A}" filter add dev veth_A-R1 \ + protocol ipv4 flower ip_proto udp \ + action pedit ex munge ip df set 0x40 pipe csum ip and udp + tc -netns "${NS_A}" filter add dev veth_A-R2 \ + protocol ipv4 flower ip_proto udp \ + action pedit ex munge ip df set 0x40 pipe csum ip and udp +} + +setup_bridge() { + run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip + run_cmd ${ns_a} ip link set br0 up + + run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C + run_cmd ${ns_c} ip link set veth_A-C netns ${NS_A} + + run_cmd ${ns_a} ip link set veth_A-C up + run_cmd ${ns_c} ip link set veth_C-A up + run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A + run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A + run_cmd ${ns_a} ip link set veth_A-C master br0 +} + +setup_ovs_vxlan_or_geneve() { + type="${1}" + a_addr="${2}" + b_addr="${3}" + + if [ "${type}" = "vxlan" ]; then + opts="${opts} ttl 64 dstport 4789" + opts_b="local ${b_addr}" + fi + + run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \ + set interface ${type}_a type=${type} \ + options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1 + + run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1 + + run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b + run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b + + run_cmd ${ns_b} ip link set ${type}_b up +} + +setup_ovs_geneve4() { + setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 +} + +setup_ovs_vxlan4() { + setup_ovs_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 +} + +setup_ovs_geneve6() { + setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 +} + +setup_ovs_vxlan6() { + setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 +} + +setup_ovs_bridge() { + run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip + run_cmd ip link set ovs_br0 up + + run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C + run_cmd ${ns_c} ip link set veth_A-C netns 1 + + run_cmd ip link set veth_A-C up + run_cmd ${ns_c} ip link set veth_C-A up + run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A + run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A + run_cmd ovs-vsctl add-port ovs_br0 veth_A-C + + # Move veth_A-R1 to init + run_cmd ${ns_a} ip link set veth_A-R1 netns 1 + run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1 + run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1 + run_cmd ip link set veth_A-R1 up + run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2 + run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2 +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip - cleanup for arg do eval setup_${arg} || { echo " ${arg} not supported"; return 1; } done @@ -644,7 +915,7 @@ trace() { for arg do [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue - ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & + ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & tcpdump_pids="${tcpdump_pids} $!" ns_cmd= done @@ -657,9 +928,23 @@ cleanup() { done tcpdump_pids= - for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do - ip netns del ${n} 2> /dev/null + for pid in ${nettest_pids}; do + kill ${pid} done + nettest_pids= + + for pid in ${socat_pids}; do + kill "${pid}" + done + socat_pids= + + cleanup_all_ns + + ip link del veth_A-C 2>/dev/null + ip link del veth_A-R1 2>/dev/null + ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null + ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null + rm -f "$tmpoutfile" } mtu() { @@ -699,15 +984,21 @@ link_get_mtu() { route_get_dst_exception() { ns_cmd="${1}" dst="${2}" + dsfield="${3}" - ${ns_cmd} ip route get "${dst}" + if [ -z "${dsfield}" ]; then + dsfield=0 + fi + + ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" } route_get_dst_pmtu_from_exception() { ns_cmd="${1}" dst="${2}" + dsfield="${3}" - mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" } check_pmtu_value() { @@ -726,7 +1017,7 @@ check_pmtu_value() { test_pmtu_ipvX() { family=${1} - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -817,6 +1108,95 @@ test_pmtu_ipv6_exception() { test_pmtu_ipvX 6 } +test_pmtu_ipv4_dscp_icmp_exception() { + rt_table=100 + + setup namespaces policy_routing || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1 + + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + + # Create route exceptions + dsfield=${policy_mark} # No ECN bit set (Not-ECT) + run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}" + + dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0)) + run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 +} + +test_pmtu_ipv4_dscp_udp_exception() { + rt_table=100 + + if ! which socat > /dev/null 2>&1; then + echo "'socat' command not found; skipping tests" + return $ksft_skip + fi + + setup namespaces policy_routing || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1 + + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + + # Create route exceptions + run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1 + socat_pids="${socat_pids} $!" + + dsfield=${policy_mark} # No ECN bit set (Not-ECT) + run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \ + UDP:"${dst1}":50000,tos="${dsfield}" + + dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0)) + run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \ + UDP:"${dst2}":50000,tos="${dsfield}" + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 +} + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { type=${1} family=${2} @@ -824,11 +1204,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ${type}4 || return 2 + setup namespaces routing ${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ${type}6 || return 2 + setup namespaces routing ${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -892,13 +1272,219 @@ test_pmtu_ipv6_geneve6_exception() { test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6 } +test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { + type=${1} + family=${2} + outer_family=${3} + ll_mtu=4000 + + if [ ${outer_family} -eq 4 ]; then + setup namespaces routing bridge bridged_${type}4 || return $ksft_skip + # IPv4 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) + else + setup namespaces routing bridge bridged_${type}6 || return $ksft_skip + # IPv6 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) + fi + + trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \ + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ + "${ns_a}" br0 "${ns_a}" veth-A-C \ + "${ns_c}" veth_C-A + + if [ ${family} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "${ns_a}" br0 $((${ll_mtu} + 1000)) + mtu "${ns_a}" veth_A-C $((${ll_mtu} + 1000)) + mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) + + run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1 + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} || return 1 + + # Check that exceptions were created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface" + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface" + + tmpoutfile=$(mktemp) + + # Flush Exceptions, retry with TCP + run_cmd ${ns_a} ip route flush cached ${dst} + run_cmd ${ns_b} ip route flush cached ${dst} + run_cmd ${ns_c} ip route flush cached ${dst} + + for target in "${ns_a}" "${ns_c}" ; do + if [ ${family} -eq 4 ]; then + TCPDST=TCP:${dst}:50000 + else + TCPDST="TCP:[${dst}]:50000" + fi + ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile & + local socat_pid=$! + + wait_local_port_listen ${NS_B} 50000 tcp + + dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + + wait ${socat_pid} + size=$(du -sb $tmpoutfile) + size=${size%%/tmp/*} + + [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 + done + + rm -f "$tmpoutfile" + + # Check that exceptions were created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface" + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface" +} + +test_pmtu_ipv4_br_vxlan4_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 4 +} + +test_pmtu_ipv6_br_vxlan4_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 4 +} + +test_pmtu_ipv4_br_geneve4_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4 +} + +test_pmtu_ipv6_br_geneve4_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4 +} + +test_pmtu_ipv4_br_vxlan6_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 6 +} + +test_pmtu_ipv6_br_vxlan6_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 6 +} + +test_pmtu_ipv4_br_geneve6_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6 +} + +test_pmtu_ipv6_br_geneve6_exception() { + test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6 +} + +test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { + type=${1} + family=${2} + outer_family=${3} + ll_mtu=4000 + + if [ ${outer_family} -eq 4 ]; then + setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip + # IPv4 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) + else + setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip + # IPv6 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) + fi + + if [ "${type}" = "vxlan" ]; then + tun_a="vxlan_sys_4789" + elif [ "${type}" = "geneve" ]; then + tun_a="genev_sys_6081" + fi + + trace "" "${tun_a}" "${ns_b}" ${type}_b \ + "" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ + "" ovs_br0 "" veth-A-C \ + "${ns_c}" veth_C-A + + if [ ${family} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + # Create route exception by exceeding link layer MTU + mtu "" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "" ovs_br0 $((${ll_mtu} + 1000)) + mtu "" veth_A-C $((${ll_mtu} + 1000)) + mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "" ${tun_a} $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) + + run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1 + + # Check that exceptions were created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface" +} + +test_pmtu_ipv4_ovs_vxlan4_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 4 +} + +test_pmtu_ipv6_ovs_vxlan4_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 4 +} + +test_pmtu_ipv4_ovs_geneve4_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4 +} + +test_pmtu_ipv6_ovs_geneve4_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4 +} + +test_pmtu_ipv4_ovs_vxlan6_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 6 +} + +test_pmtu_ipv6_ovs_vxlan6_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 6 +} + +test_pmtu_ipv4_ovs_geneve6_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6 +} + +test_pmtu_ipv6_ovs_geneve6_exception() { + test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6 +} + test_pmtu_ipvX_over_fouY_or_gueY() { inner_family=${1} outer_family=${2} encap=${3} ll_mtu=4000 - setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 + setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -977,7 +1563,7 @@ test_pmtu_ipvX_over_ipvY_exception() { outer=${2} ll_mtu=4000 - setup namespaces routing ip${inner}ip${outer} || return 2 + setup namespaces routing ip${inner}ip${outer} || return $ksft_skip trace "${ns_a}" ip_a "${ns_b}" ip_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ @@ -1031,7 +1617,7 @@ test_pmtu_ipv6_ipv6_exception() { } test_pmtu_vti4_exception() { - setup namespaces veth vti4 xfrm4 || return 2 + setup namespaces veth vti4 xfrm4 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti4_a "${ns_b}" vti4_b @@ -1061,7 +1647,67 @@ test_pmtu_vti4_exception() { } test_pmtu_vti6_exception() { - setup namespaces veth vti6 xfrm6 || return 2 + setup namespaces veth vti6 xfrm6 || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + fail=0 + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_a 4000 + mtu "${ns_b}" veth_b 4000 + mtu "${ns_a}" vti6_a 5000 + mtu "${ns_b}" vti6_b 5000 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 + + # Decrease tunnel MTU, check for PMTU decrease in route exception + mtu "${ns_a}" vti6_a 3000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 + + # Increase tunnel MTU, check for PMTU increase in route exception + mtu "${ns_a}" vti6_a 9000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 + + return ${fail} +} + +test_pmtu_vti4_udp_exception() { + setup namespaces veth vti4 xfrm4udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_a ${veth_mtu} + mtu "${ns_b}" veth_b ${veth_mtu} + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" +} + +test_pmtu_vti6_udp_exception() { + setup namespaces veth vti6 xfrm6udp || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti6_a "${ns_b}" vti6_b fail=0 @@ -1090,8 +1736,77 @@ test_pmtu_vti6_exception() { return ${fail} } +test_pmtu_vti4_udp_routed_exception() { + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" +} + +test_pmtu_vti6_udp_routed_exception() { + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 40)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 48)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + # mtu "${ns_a}" vti6_a ${vti_mtu} + # mtu "${ns_b}" vti6_b ${vti_mtu} + + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} + + # Check that exception was not created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" + +} + test_pmtu_vti4_default_mtu() { - setup namespaces veth vti4 || return 2 + setup namespaces veth vti4 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv4 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1103,7 +1818,7 @@ test_pmtu_vti4_default_mtu() { } test_pmtu_vti6_default_mtu() { - setup namespaces veth vti6 || return 2 + setup namespaces veth vti6 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv6 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1115,10 +1830,10 @@ test_pmtu_vti6_default_mtu() { } test_pmtu_vti4_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 - [ $? -ne 0 ] && err " vti not supported" && return 2 + [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti4_a fail=0 @@ -1153,10 +1868,10 @@ test_pmtu_vti4_link_add_mtu() { } test_pmtu_vti6_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 - [ $? -ne 0 ] && err " vti6 not supported" && return 2 + [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti6_a fail=0 @@ -1191,10 +1906,10 @@ test_pmtu_vti6_link_add_mtu() { } test_pmtu_vti6_link_change_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy - [ $? -ne 0 ] && err " dummy not supported" && return 2 + [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy run_cmd ${ns_a} ip link set dummy0 up run_cmd ${ns_a} ip link set dummy1 up @@ -1242,15 +1957,22 @@ check_command() { return 0 } +check_running() { + pid=${1} + cmd=${2} + + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] +} + test_cleanup_vxlanX_exception() { outer="${1}" encap="vxlan" ll_mtu=4000 - check_command taskset || return 2 + check_command taskset || return $ksft_skip cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) - setup namespaces routing ${encap}${outer} || return 2 + setup namespaces routing ${encap}${outer} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1272,11 +1994,12 @@ test_cleanup_vxlanX_exception() { ${ns_a} ip link del dev veth_A-R1 & iplink_pid=$! - sleep 1 - if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then - err " can't delete veth device in a timely manner, PMTU dst likely leaked" - return 1 - fi + for i in $(seq 1 20); do + check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0 + sleep 0.1 + done + err " can't delete veth device in a timely manner, PMTU dst likely leaked" + return 1 } test_cleanup_ipv6_exception() { @@ -1294,6 +2017,10 @@ run_test() { unset IFS + # Since cleanup() relies on variables modified by this subshell, it + # has to run in this context. + trap cleanup EXIT + if [ "$VERBOSE" = "1" ]; then printf "\n##########################################################################\n\n" fi @@ -1312,7 +2039,7 @@ run_test() { fi err_flush exit 1 - elif [ $ret -eq 2 ]; then + elif [ $ret -eq $ksft_skip ]; then printf "TEST: %-60s [SKIP]\n" "${tdesc}" err_flush fi @@ -1320,7 +2047,19 @@ run_test() { return $ret ) ret=$? - [ $ret -ne 0 ] && exitcode=1 + case $ret in + 0) + all_skipped=false + [ $exitcode -eq $ksft_skip ] && exitcode=0 + ;; + $ksft_skip) + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac return $ret } @@ -1335,7 +2074,7 @@ run_test_nh() { } test_list_flush_ipv4_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1389,7 +2128,7 @@ test_list_flush_ipv4_exception() { } test_list_flush_ipv6_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1438,6 +2177,63 @@ test_list_flush_ipv6_exception() { return ${fail} } +test_pmtu_ipvX_route_change() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + gw="${prefix4}.${a_r1}.2" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + gw="${prefix6}:${a_r1}::2" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Replace the route from A to R1 + run_cmd ${ns_a} ip route change default via ${gw} + + # Delete the device in A + run_cmd ${ns_a} ip link del "veth_A-R1" +} + +test_pmtu_ipv4_route_change() { + test_pmtu_ipvX_route_change 4 +} + +test_pmtu_ipv6_route_change() { + test_pmtu_ipvX_route_change 6 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." @@ -1454,6 +2250,7 @@ usage() { # exitcode=0 desc=0 +all_skipped=true while getopts :ptv o do @@ -1508,7 +2305,7 @@ for t in ${tests}; do if [ $run_this -eq 1 ]; then run_test "${name}" "${desc}" # if test was skipped no need to retry with nexthop objects - [ $? -eq 2 ] && rerun_nh=0 + [ $? -eq $ksft_skip ] && rerun_nh=0 if [ "${rerun_nh}" = "1" ]; then run_test_nh "${name}" "${desc}" |