aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2018-11-08 12:19:14 +0100
committerDavid S. Miller <davem@davemloft.net>2018-11-08 17:13:08 -0800
commita36e185e8c85523413c1ae3e03a0bdde5501f403 (patch)
tree74340b5913e7d77870196c5d1644efb36fb698fe /net/ipv4/udp.c
parentnet: hns3: fix spelling mistake, "assertting" -> "asserting" (diff)
downloadlinux-dev-a36e185e8c85523413c1ae3e03a0bdde5501f403.tar.xz
linux-dev-a36e185e8c85523413c1ae3e03a0bdde5501f403.zip
udp: Handle ICMP errors for tunnels with same destination port on both endpoints
For both IPv4 and IPv6, if we can't match errors to a socket, try tunnels before ignoring them. Look up a socket with the original source and destination ports as found in the UDP packet inside the ICMP payload, this will work for tunnels that force the same destination port for both endpoints, i.e. VXLAN and GENEVE. Actually, lwtunnels could break this assumption if they are configured by an external control plane to have different destination ports on the endpoints: in this case, we won't be able to trace ICMP messages back to them. For IPv6 redirect messages, call ip6_redirect() directly with the output interface argument set to the interface we received the packet from (as it's the very interface we should build the exception on), otherwise the new nexthop will be rejected. There's no such need for IPv4. Tunnels can now export an encap_err_lookup() operation that indicates a match. Pass the packet to the lookup function, and if the tunnel driver reports a matching association, continue with regular ICMP error handling. v2: - Added newline between network and transport header sets in __udp{4,6}_lib_err_encap() (David Miller) - Removed redundant skb_reset_network_header(skb); in __udp4_lib_err_encap() - Removed redundant reassignment of iph in __udp4_lib_err_encap() (Sabrina Dubroca) - Edited comment to __udp{4,6}_lib_err_encap() to reflect the fact this won't work with lwtunnels configured to use asymmetric ports. By the way, it's VXLAN, not VxLAN (Jiri Benc) Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: Sabrina Dubroca <sd@queasysnail.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c79
1 files changed, 70 insertions, 9 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3488650b90ac..ce759b61f6cd 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -583,6 +583,62 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+void udp_encap_enable(void)
+{
+ static_branch_enable(&udp_encap_needed_key);
+}
+EXPORT_SYMBOL(udp_encap_enable);
+
+/* Try to match ICMP errors to UDP tunnels by looking up a socket without
+ * reversing source and destination port: this will match tunnels that force the
+ * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
+ * lwtunnels might actually break this assumption by being configured with
+ * different destination ports on endpoints, in this case we won't be able to
+ * trace ICMP messages back to them.
+ *
+ * Then ask the tunnel implementation to match the error against a valid
+ * association.
+ *
+ * Return the socket if we have a match.
+ */
+static struct sock *__udp4_lib_err_encap(struct net *net,
+ const struct iphdr *iph,
+ struct udphdr *uh,
+ struct udp_table *udptable,
+ struct sk_buff *skb)
+{
+ int (*lookup)(struct sock *sk, struct sk_buff *skb);
+ int network_offset, transport_offset;
+ struct udp_sock *up;
+ struct sock *sk;
+
+ sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
+ iph->saddr, uh->dest, skb->dev->ifindex, 0,
+ udptable, NULL);
+ if (!sk)
+ return NULL;
+
+ network_offset = skb_network_offset(skb);
+ transport_offset = skb_transport_offset(skb);
+
+ /* Network header needs to point to the outer IPv4 header inside ICMP */
+ skb_reset_network_header(skb);
+
+ /* Transport header needs to point to the UDP header */
+ skb_set_transport_header(skb, iph->ihl << 2);
+
+ up = udp_sk(sk);
+ lookup = READ_ONCE(up->encap_err_lookup);
+ if (!lookup || lookup(sk, skb))
+ sk = NULL;
+
+ skb_set_transport_header(skb, transport_offset);
+ skb_set_network_header(skb, network_offset);
+
+ return sk;
+}
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -601,6 +657,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ bool tunnel = false;
struct sock *sk;
int harderr;
int err;
@@ -610,8 +667,15 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
iph->saddr, uh->source, skb->dev->ifindex,
inet_sdif(skb), udptable, NULL);
if (!sk) {
- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return; /* No socket for error */
+ /* No socket for error: try tunnels before discarding */
+ if (static_branch_unlikely(&udp_encap_needed_key))
+ sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb);
+
+ if (!sk) {
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+ return;
+ }
+ tunnel = true;
}
err = 0;
@@ -654,6 +718,10 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
+ if (tunnel) {
+ /* ...not for tunnels though: we don't have a sending socket */
+ goto out;
+ }
if (!inet->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
@@ -1891,13 +1959,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return 0;
}
-DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
-void udp_encap_enable(void)
-{
- static_branch_enable(&udp_encap_needed_key);
-}
-EXPORT_SYMBOL(udp_encap_enable);
-
/* returns:
* -1: error
* 0: success