From 79a131592dbb81a2dba208622a2ffbfc53f28bc0 Mon Sep 17 00:00:00 2001 From: Peter Nørlund Date: Wed, 30 Sep 2015 10:12:22 +0200 Subject: ipv4: ICMP packet inspection for multipath MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ICMP packets are inspected to let them route together with the flow they belong to, minimizing the chance that a problematic path will affect flows on other paths, and so that anycast environments can work with ECMP. Signed-off-by: Peter Nørlund Signed-off-by: David S. Miller --- include/net/route.h | 11 +++++++++- net/ipv4/icmp.c | 19 ++++++++++++++++- net/ipv4/route.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index e211dc167db1..d32cb76f5302 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -113,7 +114,15 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); +struct rtable *__ip_route_output_key_hash(struct net *, struct flowi4 *flp, + int mp_hash); + +static inline struct rtable *__ip_route_output_key(struct net *net, + struct flowi4 *flp) +{ + return __ip_route_output_key_hash(net, flp, -1); +} + struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 6b96dee2800b..36e26977c908 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -440,6 +440,22 @@ out_unlock: icmp_xmit_unlock(sk); } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +/* Source and destination is swapped. See ip_multipath_icmp_hash */ +static int icmp_multipath_hash_skb(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + + return fib_multipath_hash(iph->daddr, iph->saddr); +} + +#else + +#define icmp_multipath_hash_skb(skb) (-1) + +#endif + static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, @@ -464,7 +480,8 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); - rt = __ip_route_output_key(net, fl4); + rt = __ip_route_output_key_hash(net, fl4, + icmp_multipath_hash_skb(skb_in)); if (IS_ERR(rt)) return rt; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0cca44476b1e..54297d3a0559 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1651,6 +1651,48 @@ out: return err; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +/* To make ICMP packets follow the right flow, the multipath hash is + * calculated from the inner IP addresses in reverse order. + */ +static int ip_multipath_icmp_hash(struct sk_buff *skb) +{ + const struct iphdr *outer_iph = ip_hdr(skb); + struct icmphdr _icmph; + const struct icmphdr *icmph; + struct iphdr _inner_iph; + const struct iphdr *inner_iph; + + if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) + goto standard_hash; + + icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), + &_icmph); + if (!icmph) + goto standard_hash; + + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_REDIRECT && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB) { + goto standard_hash; + } + + inner_iph = skb_header_pointer(skb, + outer_iph->ihl * 4 + sizeof(_icmph), + sizeof(_inner_iph), &_inner_iph); + if (!inner_iph) + goto standard_hash; + + return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr); + +standard_hash: + return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr); +} + +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ + static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, const struct flowi4 *fl4, @@ -1661,7 +1703,10 @@ static int ip_mkroute_input(struct sk_buff *skb, if (res->fi && res->fi->fib_nhs > 1) { int h; - h = fib_multipath_hash(saddr, daddr); + if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP)) + h = ip_multipath_icmp_hash(skb); + else + h = fib_multipath_hash(saddr, daddr); fib_select_multipath(res, h); } #endif @@ -2030,7 +2075,8 @@ add: * Major route resolver routine. */ -struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) +struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, + int mp_hash) { struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); @@ -2194,10 +2240,9 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { - int h; - - h = fib_multipath_hash(fl4->saddr, fl4->daddr); - fib_select_multipath(&res, h); + if (mp_hash < 0) + mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr); + fib_select_multipath(&res, mp_hash); } else #endif @@ -2220,7 +2265,7 @@ out: rcu_read_unlock(); return rth; } -EXPORT_SYMBOL_GPL(__ip_route_output_key); +EXPORT_SYMBOL_GPL(__ip_route_output_key_hash); static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) { -- cgit v1.2.3-59-g8ed1b