From 7e3b6e7423d5f994257c1de88e06b509673fdbcf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 24 Oct 2015 05:47:44 -0700
Subject: ipv6: gre: support SIT encapsulation

gre_gso_segment() chokes if SIT frames were aggregated by GRO engine.

Fixes: 61c1db7fae21e ("ipv6: sit: add GSO/TSO support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/gre_offload.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 5aa46d4b44ef..5a8ee3282550 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
 				  SKB_GSO_GRE_CSUM |
-				  SKB_GSO_IPIP)))
+				  SKB_GSO_IPIP |
+				  SKB_GSO_SIT)))
 		goto out;
 
 	if (!skb->encapsulation)
-- 
cgit v1.2.3-59-g8ed1b


From c2229fe1430d4e1c70e36520229dd64a87802b20 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Tue, 27 Oct 2015 15:06:45 -0700
Subject: fib_trie: leaf_walk_rcu should not compute key if key is less than
 pn->key

We were computing the child index in cases where the key value we were
looking for was actually less than the base key of the tnode.  As a result
we were getting incorrect index values that would cause us to skip over
some children.

To fix this I have added a test that will force us to use child index 0 if
the key we are looking for is less than the key of the current tnode.

Fixes: 8be33e955cb9 ("fib_trie: Fib walk rcu should take a tnode and key instead of a trie and a leaf")
Reported-by: Brian Rak <brak@gameservers.com>
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6c2af797f2f9..744e5936c10d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1569,7 +1569,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
 	do {
 		/* record parent and next child index */
 		pn = n;
-		cindex = key ? get_index(key, pn) : 0;
+		cindex = (key > pn->key) ? get_index(key, pn) : 0;
 
 		if (cindex >> pn->bits)
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 74c16618137f1505b0a32dea3ec73a2ef6f8f842 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Sun, 25 Oct 2015 20:21:48 -0700
Subject: openvswitch: Fix double-free on ip_defrag() errors

If ip_defrag() returns an error other than -EINPROGRESS, then the skb is
freed. When handle_fragments() passes this back up to
do_execute_actions(), it will be freed again. Prevent this double free
by never freeing the skb in do_execute_actions() for errors returned by
ovs_ct_execute. Always free it in ovs_ct_execute() error paths instead.

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Reported-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/actions.c   |  4 ++--
 net/openvswitch/conntrack.c | 17 +++++++++++++----
 net/openvswitch/conntrack.h |  1 +
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 0bf0f406de52..dba635d086b2 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1109,8 +1109,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 					     nla_data(a));
 
 			/* Hide stolen IP fragments from user space. */
-			if (err == -EINPROGRESS)
-				return 0;
+			if (err)
+				return err == -EINPROGRESS ? 0 : err;
 			break;
 		}
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index a5ec34f8502f..b5dcc0abde66 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -293,6 +293,9 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
 	return helper->help(skb, protoff, ct, ctinfo);
 }
 
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
 static int handle_fragments(struct net *net, struct sw_flow_key *key,
 			    u16 zone, struct sk_buff *skb)
 {
@@ -308,8 +311,8 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 			return err;
 
 		ovs_cb.mru = IPCB(skb)->frag_max_size;
-	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
 		struct sk_buff *reasm;
 
@@ -318,17 +321,18 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 		if (!reasm)
 			return -EINPROGRESS;
 
-		if (skb == reasm)
+		if (skb == reasm) {
+			kfree_skb(skb);
 			return -EINVAL;
+		}
 
 		key->ip.proto = ipv6_hdr(reasm)->nexthdr;
 		skb_morph(skb, reasm);
 		consume_skb(reasm);
 		ovs_cb.mru = IP6CB(skb)->frag_max_size;
-#else
-		return -EPFNOSUPPORT;
 #endif
 	} else {
+		kfree_skb(skb);
 		return -EPFNOSUPPORT;
 	}
 
@@ -473,6 +477,9 @@ static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
 	return false;
 }
 
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
 int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 		   struct sw_flow_key *key,
 		   const struct ovs_conntrack_info *info)
@@ -508,6 +515,8 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 					&info->labels.mask);
 err:
 	skb_push(skb, nh_ofs);
+	if (err)
+		kfree_skb(skb);
 	return err;
 }
 
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index 82e0dfc66028..a7544f405c16 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -67,6 +67,7 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 				 struct sw_flow_key *key,
 				 const struct ovs_conntrack_info *info)
 {
+	kfree_skb(skb);
 	return -ENOTSUPP;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 190b8ffbb700a9aa47acc559779bc79c0cb14766 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Sun, 25 Oct 2015 20:21:49 -0700
Subject: ipv6: Export nf_ct_frag6_consume_orig()

This is needed in openvswitch to fix an skb leak in the next patch.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 701cd2bae0a9..c7196ad1d69f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -646,6 +646,7 @@ void nf_ct_frag6_consume_orig(struct sk_buff *skb)
 		s = s2;
 	}
 }
+EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
 
 static int nf_ct_net_init(struct net *net)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 6f5cadee44d83395dcd78d557b577e1021e192e4 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Sun, 25 Oct 2015 20:21:50 -0700
Subject: openvswitch: Fix skb leak using IPv6 defrag

nf_ct_frag6_gather() makes a clone of each skb passed to it, and if the
reassembly is successful, expects the caller to free all of the original
skbs using nf_ct_frag6_consume_orig(). This call was previously missing,
meaning that the original fragments were never freed (with the exception
of the last fragment to arrive).

Fix this by ensuring that all original fragments except for the last
fragment are freed via nf_ct_frag6_consume_orig(). The last fragment
will be morphed into the head, so it must not be freed yet. Furthermore,
retain the ->next pointer for the head after skb_morph().

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
Reported-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index b5dcc0abde66..50095820edb7 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -326,8 +326,15 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 			return -EINVAL;
 		}
 
+		/* Don't free 'skb' even though it is one of the original
+		 * fragments, as we're going to morph it into the head.
+		 */
+		skb_get(skb);
+		nf_ct_frag6_consume_orig(reasm);
+
 		key->ip.proto = ipv6_hdr(reasm)->nexthdr;
 		skb_morph(skb, reasm);
+		skb->next = reasm->next;
 		consume_skb(reasm);
 		ovs_cb.mru = IP6CB(skb)->frag_max_size;
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 8ce675ff39b9958d1c10f86cf58e357efaafc856 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Mon, 26 Oct 2015 12:46:37 -0400
Subject: RDS-TCP: Recover correctly from pskb_pull()/pksb_trim() failure in
 rds_tcp_data_recv

Either of pskb_pull() or pskb_trim() may fail under low memory conditions.
If rds_tcp_data_recv() ignores such failures, the application will
receive corrupted data because the skb has not been correctly
carved to the RDS datagram size.

Avoid this by handling pskb_pull/pskb_trim failure in the same
manner as the skb_clone failure: bail out of rds_tcp_data_recv(), and
retry via the deferred call to rds_send_worker() that gets set up on
ENOMEM from rds_tcp_read_sock()

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp_recv.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index fbc5ef88bc0e..27a992154804 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -214,8 +214,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 			}
 
 			to_copy = min(tc->t_tinc_data_rem, left);
-			pskb_pull(clone, offset);
-			pskb_trim(clone, to_copy);
+			if (!pskb_pull(clone, offset) ||
+			    pskb_trim(clone, to_copy)) {
+				pr_warn("rds_tcp_data_recv: pull/trim failed "
+					"left %zu data_rem %zu skb_len %d\n",
+					left, tc->t_tinc_data_rem, skb->len);
+				kfree_skb(clone);
+				desc->error = -ENOMEM;
+				goto out;
+			}
 			skb_queue_tail(&tinc->ti_skb_list, clone);
 
 			rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
-- 
cgit v1.2.3-59-g8ed1b


From 1e0d69a9cc9172d7896c2113f983a74f6e8ff303 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 28 Oct 2015 13:21:03 +0100
Subject: Revert "Merge branch 'ipv6-overflow-arith'"

Linus dislikes these changes. To not hold up the net-merge let's revert
it for now and fix the bug like Linus suggested.

This reverts commit ec3661b42257d9a06cf0d318175623ac7a660113, reversing
changes made to c80dbe04612986fd6104b4a1be21681b113b5ac9.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/compiler-gcc.h   |  4 ----
 include/linux/overflow-arith.h | 18 ------------------
 net/ipv6/ip6_output.c          |  6 +-----
 3 files changed, 1 insertion(+), 27 deletions(-)
 delete mode 100644 include/linux/overflow-arith.h

(limited to 'net')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 82c159e0532a..dfaa7b3e9ae9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -237,10 +237,6 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
-#if GCC_VERSION >= 50000
-#define CC_HAVE_BUILTIN_OVERFLOW
-#endif
-
 #endif	/* gcc version >= 40000 specific checks */
 
 #if !defined(__noclone)
diff --git a/include/linux/overflow-arith.h b/include/linux/overflow-arith.h
deleted file mode 100644
index e12ccf854a70..000000000000
--- a/include/linux/overflow-arith.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <linux/kernel.h>
-
-#ifdef CC_HAVE_BUILTIN_OVERFLOW
-
-#define overflow_usub __builtin_usub_overflow
-
-#else
-
-static inline bool overflow_usub(unsigned int a, unsigned int b,
-				 unsigned int *res)
-{
-	*res = a - b;
-	return *res > a ? true : false;
-}
-
-#endif
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8dddb45c433e..d03d6da772f3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -28,7 +28,6 @@
 
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/overflow-arith.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -585,10 +584,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
-
-	if (overflow_usub(mtu, hlen + sizeof(struct frag_hdr), &mtu) ||
-	    mtu <= 7)
-		goto fail_toobig;
+	mtu -= hlen + sizeof(struct frag_hdr);
 
 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 				    &ipv6_hdr(skb)->saddr);
-- 
cgit v1.2.3-59-g8ed1b


From 89bc7848a91bc99532f5c21b2885472ba710f249 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 28 Oct 2015 13:21:04 +0100
Subject: ipv6: protect mtu calculation of wrap-around and infinite loop by
 rounding issues

Raw sockets with hdrincl enabled can insert ipv6 extension headers
right into the data stream. In case we need to fragment those packets,
we reparse the options header to find the place where we can insert
the fragment header. If the extension headers exceed the link's MTU we
actually cannot make progress in such a case.

Instead of ending up in broken arithmetic or rounding towards 0 and
entering an endless loop in ip6_fragment, just prevent those cases by
aborting early and signal -EMSGSIZE to user space.

This is the second version of the patch which doesn't use the
overflow_usub function, which got reverted for now.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d03d6da772f3..f84ec4e9b2de 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -584,6 +584,8 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
+	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
+		goto fail_toobig;
 	mtu -= hlen + sizeof(struct frag_hdr);
 
 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
-- 
cgit v1.2.3-59-g8ed1b