From 7e3b6e7423d5f994257c1de88e06b509673fdbcf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 24 Oct 2015 05:47:44 -0700 Subject: ipv6: gre: support SIT encapsulation gre_gso_segment() chokes if SIT frames were aggregated by GRO engine. Fixes: 61c1db7fae21e ("ipv6: sit: add GSO/TSO support") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 5aa46d4b44ef..5a8ee3282550 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP))) + SKB_GSO_IPIP | + SKB_GSO_SIT))) goto out; if (!skb->encapsulation) -- cgit v1.2.3-59-g8ed1b From c2229fe1430d4e1c70e36520229dd64a87802b20 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 27 Oct 2015 15:06:45 -0700 Subject: fib_trie: leaf_walk_rcu should not compute key if key is less than pn->key We were computing the child index in cases where the key value we were looking for was actually less than the base key of the tnode. As a result we were getting incorrect index values that would cause us to skip over some children. To fix this I have added a test that will force us to use child index 0 if the key we are looking for is less than the key of the current tnode. Fixes: 8be33e955cb9 ("fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf") Reported-by: Brian Rak Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 6c2af797f2f9..744e5936c10d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1569,7 +1569,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key) do { /* record parent and next child index */ pn = n; - cindex = key ? get_index(key, pn) : 0; + cindex = (key > pn->key) ? get_index(key, pn) : 0; if (cindex >> pn->bits) break; -- cgit v1.2.3-59-g8ed1b From 74c16618137f1505b0a32dea3ec73a2ef6f8f842 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Sun, 25 Oct 2015 20:21:48 -0700 Subject: openvswitch: Fix double-free on ip_defrag() errors If ip_defrag() returns an error other than -EINPROGRESS, then the skb is freed. When handle_fragments() passes this back up to do_execute_actions(), it will be freed again. Prevent this double free by never freeing the skb in do_execute_actions() for errors returned by ovs_ct_execute. Always free it in ovs_ct_execute() error paths instead. Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Reported-by: Florian Westphal Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 4 ++-- net/openvswitch/conntrack.c | 17 +++++++++++++---- net/openvswitch/conntrack.h | 1 + 3 files changed, 16 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 0bf0f406de52..dba635d086b2 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1109,8 +1109,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, nla_data(a)); /* Hide stolen IP fragments from user space. */ - if (err == -EINPROGRESS) - return 0; + if (err) + return err == -EINPROGRESS ? 0 : err; break; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index a5ec34f8502f..b5dcc0abde66 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -293,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) return helper->help(skb, protoff, ct, ctinfo); } +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */ static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { @@ -308,8 +311,8 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, return err; ovs_cb.mru = IPCB(skb)->frag_max_size; - } else if (key->eth.type == htons(ETH_P_IPV6)) { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; struct sk_buff *reasm; @@ -318,17 +321,18 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, if (!reasm) return -EINPROGRESS; - if (skb == reasm) + if (skb == reasm) { + kfree_skb(skb); return -EINVAL; + } key->ip.proto = ipv6_hdr(reasm)->nexthdr; skb_morph(skb, reasm); consume_skb(reasm); ovs_cb.mru = IP6CB(skb)->frag_max_size; -#else - return -EPFNOSUPPORT; #endif } else { + kfree_skb(skb); return -EPFNOSUPPORT; } @@ -473,6 +477,9 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels) return false; } +/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero + * value if 'skb' is freed. + */ int ovs_ct_execute(struct net *net, struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_conntrack_info *info) @@ -508,6 +515,8 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, &info->labels.mask); err: skb_push(skb, nh_ofs); + if (err) + kfree_skb(skb); return err; } diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 82e0dfc66028..a7544f405c16 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -67,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_conntrack_info *info) { + kfree_skb(skb); return -ENOTSUPP; } -- cgit v1.2.3-59-g8ed1b From 190b8ffbb700a9aa47acc559779bc79c0cb14766 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Sun, 25 Oct 2015 20:21:49 -0700 Subject: ipv6: Export nf_ct_frag6_consume_orig() This is needed in openvswitch to fix an skb leak in the next patch. Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 701cd2bae0a9..c7196ad1d69f 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -646,6 +646,7 @@ void nf_ct_frag6_consume_orig(struct sk_buff *skb) s = s2; } } +EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); static int nf_ct_net_init(struct net *net) { -- cgit v1.2.3-59-g8ed1b From 6f5cadee44d83395dcd78d557b577e1021e192e4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Sun, 25 Oct 2015 20:21:50 -0700 Subject: openvswitch: Fix skb leak using IPv6 defrag nf_ct_frag6_gather() makes a clone of each skb passed to it, and if the reassembly is successful, expects the caller to free all of the original skbs using nf_ct_frag6_consume_orig(). This call was previously missing, meaning that the original fragments were never freed (with the exception of the last fragment to arrive). Fix this by ensuring that all original fragments except for the last fragment are freed via nf_ct_frag6_consume_orig(). The last fragment will be morphed into the head, so it must not be freed yet. Furthermore, retain the ->next pointer for the head after skb_morph(). Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Reported-by: Florian Westphal Signed-off-by: Joe Stringer Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index b5dcc0abde66..50095820edb7 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -326,8 +326,15 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, return -EINVAL; } + /* Don't free 'skb' even though it is one of the original + * fragments, as we're going to morph it into the head. + */ + skb_get(skb); + nf_ct_frag6_consume_orig(reasm); + key->ip.proto = ipv6_hdr(reasm)->nexthdr; skb_morph(skb, reasm); + skb->next = reasm->next; consume_skb(reasm); ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif -- cgit v1.2.3-59-g8ed1b From 8ce675ff39b9958d1c10f86cf58e357efaafc856 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 26 Oct 2015 12:46:37 -0400 Subject: RDS-TCP: Recover correctly from pskb_pull()/pksb_trim() failure in rds_tcp_data_recv Either of pskb_pull() or pskb_trim() may fail under low memory conditions. If rds_tcp_data_recv() ignores such failures, the application will receive corrupted data because the skb has not been correctly carved to the RDS datagram size. Avoid this by handling pskb_pull/pskb_trim failure in the same manner as the skb_clone failure: bail out of rds_tcp_data_recv(), and retry via the deferred call to rds_send_worker() that gets set up on ENOMEM from rds_tcp_read_sock() Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp_recv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index fbc5ef88bc0e..27a992154804 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -214,8 +214,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } to_copy = min(tc->t_tinc_data_rem, left); - pskb_pull(clone, offset); - pskb_trim(clone, to_copy); + if (!pskb_pull(clone, offset) || + pskb_trim(clone, to_copy)) { + pr_warn("rds_tcp_data_recv: pull/trim failed " + "left %zu data_rem %zu skb_len %d\n", + left, tc->t_tinc_data_rem, skb->len); + kfree_skb(clone); + desc->error = -ENOMEM; + goto out; + } skb_queue_tail(&tinc->ti_skb_list, clone); rdsdebug("skb %p data %p len %d off %u to_copy %zu -> " -- cgit v1.2.3-59-g8ed1b From 1e0d69a9cc9172d7896c2113f983a74f6e8ff303 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 28 Oct 2015 13:21:03 +0100 Subject: Revert "Merge branch 'ipv6-overflow-arith'" Linus dislikes these changes. To not hold up the net-merge let's revert it for now and fix the bug like Linus suggested. This reverts commit ec3661b42257d9a06cf0d318175623ac7a660113, reversing changes made to c80dbe04612986fd6104b4a1be21681b113b5ac9. Cc: Linus Torvalds Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/compiler-gcc.h | 4 ---- include/linux/overflow-arith.h | 18 ------------------ net/ipv6/ip6_output.c | 6 +----- 3 files changed, 1 insertion(+), 27 deletions(-) delete mode 100644 include/linux/overflow-arith.h (limited to 'net') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 82c159e0532a..dfaa7b3e9ae9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -237,10 +237,6 @@ #define KASAN_ABI_VERSION 3 #endif -#if GCC_VERSION >= 50000 -#define CC_HAVE_BUILTIN_OVERFLOW -#endif - #endif /* gcc version >= 40000 specific checks */ #if !defined(__noclone) diff --git a/include/linux/overflow-arith.h b/include/linux/overflow-arith.h deleted file mode 100644 index e12ccf854a70..000000000000 --- a/include/linux/overflow-arith.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include - -#ifdef CC_HAVE_BUILTIN_OVERFLOW - -#define overflow_usub __builtin_usub_overflow - -#else - -static inline bool overflow_usub(unsigned int a, unsigned int b, - unsigned int *res) -{ - *res = a - b; - return *res > a ? true : false; -} - -#endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8dddb45c433e..d03d6da772f3 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -28,7 +28,6 @@ #include #include -#include #include #include #include @@ -585,10 +584,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, if (np->frag_size) mtu = np->frag_size; } - - if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) || - mtu <= 7) - goto fail_toobig; + mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); -- cgit v1.2.3-59-g8ed1b From 89bc7848a91bc99532f5c21b2885472ba710f249 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 28 Oct 2015 13:21:04 +0100 Subject: ipv6: protect mtu calculation of wrap-around and infinite loop by rounding issues Raw sockets with hdrincl enabled can insert ipv6 extension headers right into the data stream. In case we need to fragment those packets, we reparse the options header to find the place where we can insert the fragment header. If the extension headers exceed the link's MTU we actually cannot make progress in such a case. Instead of ending up in broken arithmetic or rounding towards 0 and entering an endless loop in ip6_fragment, just prevent those cases by aborting early and signal -EMSGSIZE to user space. This is the second version of the patch which doesn't use the overflow_usub function, which got reverted for now. Suggested-by: Linus Torvalds Cc: Linus Torvalds Reported-by: Dmitry Vyukov Cc: Dmitry Vyukov Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d03d6da772f3..f84ec4e9b2de 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -584,6 +584,8 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb, if (np->frag_size) mtu = np->frag_size; } + if (mtu < hlen + sizeof(struct frag_hdr) + 8) + goto fail_toobig; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, -- cgit v1.2.3-59-g8ed1b