From 08dcdbf6a7b9d14c2302c5bd0c5390ddf122f664 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Feb 2013 12:18:52 +0000 Subject: ipv6: use a stronger hash for tcp It looks like its possible to open thousands of TCP IPv6 sessions on a server, all landing in a single slot of TCP hash table. Incoming packets have to lookup sockets in a very long list. We should hash all bits from foreign IPv6 addresses, using a salt and hash mix, not a simple XOR. inet6_ehashfn() can also separately use the ports, instead of xoring them. Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 8 ++++---- include/net/inet_sock.h | 1 + include/net/ipv6.h | 12 ++++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 7ca75cbbf75e..fd4ee016ba5c 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -28,16 +28,16 @@ struct inet_hashinfo; -/* I have no idea if this is a good hash for v6 or not. -DaveM */ static inline unsigned int inet6_ehashfn(struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { - u32 ports = (lport ^ (__force u16)fport); + u32 ports = (((u32)lport) << 16) | (__force u32)fport; return jhash_3words((__force u32)laddr->s6_addr32[3], - (__force u32)faddr->s6_addr32[3], - ports, inet_ehash_secret + net_hash_mix(net)); + ipv6_addr_jhash(faddr), + ports, + inet_ehash_secret + net_hash_mix(net)); } static inline int inet6_sk_ehashfn(const struct sock *sk) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a4196cbc84ec..7235ae73a1e8 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -203,6 +203,7 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, extern int inet_sk_rebuild_header(struct sock *sk); extern u32 inet_ehash_secret; +extern u32 ipv6_hash_secret; extern void build_ehash_secret(void); static inline unsigned int inet_ehashfn(struct net *net, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 851d5412a299..64d12e77719a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -514,6 +515,17 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a) #endif } +/* more secured version of ipv6_addr_hash() */ +static inline u32 ipv6_addr_jhash(const struct in6_addr *a) +{ + u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1]; + + return jhash_3words(v, + (__force u32)a->s6_addr32[2], + (__force u32)a->s6_addr32[3], + ipv6_hash_secret); +} + static inline bool ipv6_addr_loopback(const struct in6_addr *a) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 -- cgit v1.2.3-59-g8ed1b From 5b0520425e5ea81ba95ec486dd6bbb59a09fff0e Mon Sep 17 00:00:00 2001 From: Li Wei Date: Thu, 21 Feb 2013 22:18:44 +0000 Subject: ipv4: fix error handling in icmp_protocol. Now we handle icmp errors in each transport protocol's err_handler, for icmp protocols, that is ping_err. Since this handler only care of those icmp errors triggered by echo request, errors triggered by echo reply(which sent by kernel) are sliently ignored. So wrap ping_err() with icmp_err() to deal with those icmp errors. Signed-off-by: Li Wei Signed-off-by: David S. Miller --- include/net/icmp.h | 1 + net/ipv4/af_inet.c | 2 +- net/ipv4/icmp.c | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/icmp.h b/include/net/icmp.h index 9ac2524d1402..081439fd070e 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -41,6 +41,7 @@ struct net; extern void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); extern int icmp_rcv(struct sk_buff *skb); +extern void icmp_err(struct sk_buff *, u32 info); extern int icmp_init(void); extern void icmp_out_count(struct net *net, unsigned char type); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9cbcb94a4c6d..15847e19b7dd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1577,7 +1577,7 @@ static const struct net_offload udp_offload = { static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, - .err_handler = ping_err, + .err_handler = icmp_err, .no_policy = 1, .netns_ok = 1, }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 17ff9fd7cdda..3ac5dff79627 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -934,6 +934,29 @@ error: goto drop; } +void icmp_err(struct sk_buff *skb, u32 info) +{ + struct iphdr *iph = (struct iphdr *)skb->data; + struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int type = icmp_hdr(skb)->type; + int code = icmp_hdr(skb)->code; + struct net *net = dev_net(skb->dev); + + /* + * Use ping_err to handle all icmp errors except those + * triggered by ICMP_ECHOREPLY which sent from kernel. + */ + if (icmph->type != ICMP_ECHOREPLY) { + ping_err(skb, info); + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ICMP, 0); + else if (type == ICMP_REDIRECT) + ipv4_redirect(skb, net, 0, 0, IPPROTO_ICMP, 0); +} + /* * This table is the definition of how we handle ICMP. */ -- cgit v1.2.3-59-g8ed1b From 4cfb04854d053e4d6391d7f84495f48082342362 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Feb 2013 07:43:35 +0000 Subject: net: fix possible deadlock in sum_frag_mem_limit Dave Jones reported a lockdep splat occurring in IP defrag code. commit 6d7b857d541ecd1d (net: use lib/percpu_counter API for fragmentation mem accounting) added a possible deadlock. Because percpu_counter_sum_positive() needs to acquire a lock that can be used from softirq, we need to disable BH in sum_frag_mem_limit() Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/inet_frag.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 3f237db0a426..76c3fe5ecc2e 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -114,7 +114,13 @@ static inline void init_frag_mem_limit(struct netns_frags *nf) static inline int sum_frag_mem_limit(struct netns_frags *nf) { - return percpu_counter_sum_positive(&nf->mem); + int res; + + local_bh_disable(); + res = percpu_counter_sum_positive(&nf->mem); + local_bh_enable(); + + return res; } static inline void inet_frag_lru_move(struct inet_frag_queue *q) -- cgit v1.2.3-59-g8ed1b From 490ab08127cebc25e3a260a74556b56ce5f47c0f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Fri, 22 Feb 2013 07:30:30 +0000 Subject: IP_GRE: Fix IP-Identification. GRE-GSO generates ip fragments with id 0,2,3,4... for every GSO packet, which is not correct. Following patch fixes it by setting ip-header id unique id of fragments are allowed. As Eric Dumazet suggested it is optimized by using inner ip-header whenever inner packet is ipv4. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ipip.h | 17 +++++++++++++++++ net/ipv4/af_inet.c | 6 ++++-- net/ipv4/ip_gre.c | 3 ++- 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ipip.h b/include/net/ipip.h index 21947cf4fa46..fd19625ff99d 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -71,4 +71,21 @@ static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev) } } +static inline void tunnel_ip_select_ident(struct sk_buff *skb, + const struct iphdr *old_iph, + struct dst_entry *dst) +{ + struct iphdr *iph = ip_hdr(skb); + + if (iph->frag_off & htons(IP_DF)) + iph->id = 0; + else { + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); + } +} #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 15847e19b7dd..68f6a94f7661 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1332,8 +1332,10 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (skb->next != NULL) iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); - } else - iph->id = htons(id++); + } else { + if (!(iph->frag_off & htons(IP_DF))) + iph->id = htons(id++); + } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5ef4da780ac1..b8bada00d516 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -970,7 +970,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; iph->ttl = ttl; - iph->id = 0; + + tunnel_ip_select_ident(skb, old_iph, &rt->dst); if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) -- cgit v1.2.3-59-g8ed1b