aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
blob: eb9553e4986b01861709c0b4c533885213c30f6a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4

# Conntrack needs to reassemble fragments in order to have complete
# packets for rule matching.  Reassembly can lead to packet loss.

# Consider the following setup:
#            +--------+       +---------+       +--------+
#            |Router A|-------|Wanrouter|-------|Router B|
#            |        |.IPIP..|         |..IPIP.|        |
#            +--------+       +---------+       +--------+
#           /                  mtu 1400                   \
#          /                                               \
#+--------+                                                 +--------+
#|Client A|                                                 |Client B|
#|        |                                                 |        |
#+--------+                                                 +--------+

# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
# on its interfaces.

rnd=$(mktemp -u XXXXXXXX)
rx=$(mktemp)

r_a="ns-ra-$rnd"
r_b="ns-rb-$rnd"
r_w="ns-rw-$rnd"
c_a="ns-ca-$rnd"
c_b="ns-cb-$rnd"

checktool (){
	if ! $1 > /dev/null 2>&1; then
		echo "SKIP: Could not $2"
		exit $ksft_skip
	fi
}

checktool "iptables --version" "run test without iptables"
checktool "ip -Version" "run test without ip tool"
checktool "which socat" "run test without socat"
checktool "ip netns add ${r_a}" "create net namespace"

for n in ${r_b} ${r_w} ${c_a} ${c_b};do
	ip netns add ${n}
done

cleanup() {
	for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
		ip netns del ${n}
	done
	rm -f ${rx}
}

trap cleanup EXIT

test_path() {
	msg="$1"

	ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null &

	sleep 1
	for i in 1 2 3; do
		head -c1400 /dev/zero | tr "\000" "a" | \
			ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000
	done

	wait

	bytes=$(wc -c < ${rx})

	if [ $bytes -eq 1400 ];then
		echo "OK: PMTU $msg connection tracking"
	else
		echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
		exit 1
	fi
}

# Detailed setup for Router A
# ---------------------------
# Interfaces:
# eth0: 10.2.2.1/24
# eth1: 192.168.10.1/24
# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
# Routes:
# 192.168.20.0/24 dev ipip0    (192.168.20.0/24 is subnet of Client B)
# 10.4.4.1 via 10.2.2.254      (Router B via Wanrouter)
# No iptables rules at all.

ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}

l_addr="10.2.2.1"
r_addr="10.4.4.1"
ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip

for dev in lo veth0 veth1 ipip0; do
    ip -net ${r_a} link set $dev up
done

ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
ip -net ${r_a} addr add 192.168.10.1/24 dev veth1

ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254

ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null

# Detailed setup for Router B
# ---------------------------
# Interfaces:
# eth0: 10.4.4.1/24
# eth1: 192.168.20.1/24
# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
# Routes:
# 192.168.10.0/24 dev ipip0    (192.168.10.0/24 is subnet of Client A)
# 10.2.2.1 via 10.4.4.254      (Router A via Wanrouter)
# No iptables rules at all.

ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}

l_addr="10.4.4.1"
r_addr="10.2.2.1"

ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip

for dev in lo veth0 veth1 ipip0; do
	ip -net ${r_b} link set $dev up
done

ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
ip -net ${r_b} addr add 192.168.20.1/24 dev veth1

ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null

# Client A
ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
ip -net ${c_a} link set dev lo up
ip -net ${c_a} link set dev veth0 up
ip -net ${c_a} route add default via 192.168.10.1

# Client A
ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
ip -net ${c_b} link set dev veth0 up
ip -net ${c_b} link set dev lo up
ip -net ${c_b} route add default via 192.168.20.1

# Wan
ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
ip -net ${r_w} addr add 10.4.4.254/24 dev veth1

ip -net ${r_w} link set dev lo up
ip -net ${r_w} link set dev veth0 up mtu 1400
ip -net ${r_w} link set dev veth1 up mtu 1400

ip -net ${r_a} link set dev veth0 mtu 1400
ip -net ${r_b} link set dev veth0 mtu 1400

ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null

# Path MTU discovery
# ------------------
# Running tracepath from Client A to Client B shows PMTU discovery is working
# as expected:
#
# clienta:~# tracepath 192.168.20.2
# 1?: [LOCALHOST]                      pmtu 1500
# 1:  192.168.10.1                                          0.867ms
# 1:  192.168.10.1                                          0.302ms
# 2:  192.168.10.1                                          0.312ms pmtu 1480
# 2:  no reply
# 3:  192.168.10.1                                          0.510ms pmtu 1380
# 3:  192.168.20.2                                          2.320ms reached
# Resume: pmtu 1380 hops 3 back 3

# ip netns exec ${c_a} traceroute --mtu 192.168.20.2

# Router A has learned PMTU (1400) to Router B from Wanrouter.
# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
# from Router A.

#Send large UDP packet
#---------------------
#Now we send a 1400 bytes UDP packet from Client A to Client B:

# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000
test_path "without"

# The IPv4 stack on Client A already knows the PMTU to Client B, so the
# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
# fragments between eth1 and ipip0. The fragments fit into the tunnel and
# reach their destination.

#When sending the large UDP packet again, Router A now reassembles the
#fragments before routing the packet over ipip0. The resulting IPIP
#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
#dropped on Router A before sending.

ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
test_path "with"