From 19d7df69fdb2636856dc8919de72fc1bf8f79598 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 1 Feb 2018 08:49:23 +0100
Subject: xfrm: Refuse to insert 32 bit userspace socket policies on 64 bit
 systems

We don't have a compat layer for xfrm, so userspace and kernel
structures have different sizes in this case. This results in
a broken configuration, so refuse to configure socket policies
when trying to insert from 32 bit userspace as we do it already
with policies inserted via netlink.

Reported-and-tested-by: syzbot+e1a1577ca8bcb47b769a@syzkaller.appspotmail.com
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 54e21f19d722..f9d2f2233f09 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2056,6 +2056,11 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
 	struct xfrm_mgr *km;
 	struct xfrm_policy *pol = NULL;
 
+#ifdef CONFIG_COMPAT
+	if (in_compat_syscall())
+		return -EOPNOTSUPP;
+#endif
+
 	if (!optval && !optlen) {
 		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
 		xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
-- 
cgit v1.2.3-59-g8ed1b


From 2471c98165494173a3cd03231b216b909c063e41 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Thu, 1 Feb 2018 11:26:12 +0100
Subject: xfrm: Fix policy hold queue after flowcache removal.

Now that the flowcache is removed we need to generate
a new dummy bundle every time we check if the needed
SAs are in place because the dummy bundle is not cached
anymore. Fix it by passing the XFRM_LOOKUP_QUEUE flag
to xfrm_lookup(). This makes sure that we get a dummy
bundle in case the SAs are not yet in place.

Fixes: 3ca28286ea80 ("xfrm_policy: bypass flow_cache_lookup")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7a23078132cf..8b3811ff002d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1891,7 +1891,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
 	spin_unlock(&pq->hold_queue.lock);
 
 	dst_hold(xfrm_dst_path(dst));
-	dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
+	dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE);
 	if (IS_ERR(dst))
 		goto purge_queue;
 
-- 
cgit v1.2.3-59-g8ed1b


From d97ca5d714a5334aecadadf696875da40f1fbf3e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 12 Feb 2018 14:42:01 +0100
Subject: xfrm_user: uncoditionally validate esn replay attribute struct

The sanity test added in ecd7918745234 can be bypassed, validation
only occurs if XFRM_STATE_ESN flag is set, but rest of code doesn't care
and just checks if the attribute itself is present.

So always validate.  Alternative is to reject if we have the attribute
without the flag but that would change abi.

Reported-by: syzbot+0ab777c27d2bb7588f73@syzkaller.appspotmail.com
Cc: Mathias Krause <minipli@googlemail.com>
Fixes: ecd7918745234 ("xfrm_user: ensure user supplied esn replay window is valid")
Fixes: d8647b79c3b7e ("xfrm: Add user interface for esn and big anti-replay windows")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7f52b8eb177d..080035f056d9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -121,22 +121,17 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
 	struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
 	struct xfrm_replay_state_esn *rs;
 
-	if (p->flags & XFRM_STATE_ESN) {
-		if (!rt)
-			return -EINVAL;
+	if (!rt)
+		return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0;
 
-		rs = nla_data(rt);
+	rs = nla_data(rt);
 
-		if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
-			return -EINVAL;
-
-		if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
-		    nla_len(rt) != sizeof(*rs))
-			return -EINVAL;
-	}
+	if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
+		return -EINVAL;
 
-	if (!rt)
-		return 0;
+	if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
+	    nla_len(rt) != sizeof(*rs))
+		return -EINVAL;
 
 	/* As only ESP and AH support ESN feature. */
 	if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
-- 
cgit v1.2.3-59-g8ed1b


From 510c321b557121861601f9d259aadd65aa274f35 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Feb 2018 19:06:02 +0800
Subject: xfrm: reuse uncached_list to track xdsts

In early time, when freeing a xdst, it would be inserted into
dst_garbage.list first. Then if it's refcnt was still held
somewhere, later it would be put into dst_busy_list in
dst_gc_task().

When one dev was being unregistered, the dev of these dsts in
dst_busy_list would be set with loopback_dev and put this dev.
So that this dev's removal wouldn't get blocked, and avoid the
kmsg warning:

  kernel:unregister_netdevice: waiting for veth0 to become \
  free. Usage count = 2

However after Commit 52df157f17e5 ("xfrm: take refcnt of dst
when creating struct xfrm_dst bundle"), the xdst will not be
freed with dst gc, and this warning happens.

To fix it, we need to find these xdsts that are still held by
others when removing the dev, and free xdst's dev and set it
with loopback_dev.

But unfortunately after flow_cache for xfrm was deleted, no
list tracks them anymore. So we need to save these xdsts
somewhere to release the xdst's dev later.

To make this easier, this patch is to reuse uncached_list to
track xdsts, so that the dev refcnt can be released in the
event NETDEV_UNREGISTER process of fib_netdev_notifier.

Thanks to Florian, we could move forward this fix quickly.

Fixes: 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle")
Reported-by: Jianlin Shi <jishi@redhat.com>
Reported-by: Hangbin Liu <liuhangbin@gmail.com>
Tested-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/ip6_route.h |  3 +++
 include/net/route.h     |  3 +++
 net/ipv4/route.c        | 21 +++++++++++++--------
 net/ipv4/xfrm4_policy.c |  4 +++-
 net/ipv6/route.c        |  4 ++--
 net/ipv6/xfrm6_policy.c |  5 +++++
 6 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 27d23a65f3cd..ac0866bb9e93 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -179,6 +179,9 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event);
 void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
 void rt6_multipath_rebalance(struct rt6_info *rt);
 
+void rt6_uncached_list_add(struct rt6_info *rt);
+void rt6_uncached_list_del(struct rt6_info *rt);
+
 static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
 {
 	const struct dst_entry *dst = skb_dst(skb);
diff --git a/include/net/route.h b/include/net/route.h
index 1eb9ce470e25..40b870d58f38 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -227,6 +227,9 @@ struct in_ifaddr;
 void fib_add_ifaddr(struct in_ifaddr *);
 void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
 
+void rt_add_uncached_list(struct rtable *rt);
+void rt_del_uncached_list(struct rtable *rt);
+
 static inline void ip_rt_put(struct rtable *rt)
 {
 	/* dst_release() accepts a NULL parameter.
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 49cc1c1df1ba..1d1e4abe04b0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1383,7 +1383,7 @@ struct uncached_list {
 
 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
 
-static void rt_add_uncached_list(struct rtable *rt)
+void rt_add_uncached_list(struct rtable *rt)
 {
 	struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
 
@@ -1394,14 +1394,8 @@ static void rt_add_uncached_list(struct rtable *rt)
 	spin_unlock_bh(&ul->lock);
 }
 
-static void ipv4_dst_destroy(struct dst_entry *dst)
+void rt_del_uncached_list(struct rtable *rt)
 {
-	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
-	struct rtable *rt = (struct rtable *) dst;
-
-	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
-		kfree(p);
-
 	if (!list_empty(&rt->rt_uncached)) {
 		struct uncached_list *ul = rt->rt_uncached_list;
 
@@ -1411,6 +1405,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	}
 }
 
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
+	struct rtable *rt = (struct rtable *)dst;
+
+	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
+		kfree(p);
+
+	rt_del_uncached_list(rt);
+}
+
 void rt_flush_dev(struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 05017e2c849c..8d33f7b311f4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -102,6 +102,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
 	xdst->u.rt.rt_table_id = rt->rt_table_id;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
+	rt_add_uncached_list(&xdst->u.rt);
 
 	return 0;
 }
@@ -241,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
 	dst_destroy_metrics_generic(dst);
-
+	if (xdst->u.rt.rt_uncached_list)
+		rt_del_uncached_list(&xdst->u.rt);
 	xfrm_dst_destroy(xdst);
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb2d251c0500..38b75e9d6eae 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -128,7 +128,7 @@ struct uncached_list {
 
 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 
-static void rt6_uncached_list_add(struct rt6_info *rt)
+void rt6_uncached_list_add(struct rt6_info *rt)
 {
 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 
@@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt)
 	spin_unlock_bh(&ul->lock);
 }
 
-static void rt6_uncached_list_del(struct rt6_info *rt)
+void rt6_uncached_list_del(struct rt6_info *rt)
 {
 	if (!list_empty(&rt->rt6i_uncached)) {
 		struct uncached_list *ul = rt->rt6i_uncached_list;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 09fb44ee3b45..416fe67271a9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
 	xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
 	xdst->u.rt6.rt6i_src = rt->rt6i_src;
+	INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
+	rt6_uncached_list_add(&xdst->u.rt6);
+	atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);
 
 	return 0;
 }
@@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
 	if (likely(xdst->u.rt6.rt6i_idev))
 		in6_dev_put(xdst->u.rt6.rt6i_idev);
 	dst_destroy_metrics_generic(dst);
+	if (xdst->u.rt6.rt6i_uncached_list)
+		rt6_uncached_list_del(&xdst->u.rt6);
 	xfrm_dst_destroy(xdst);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 143a4454daaf0e80a2b9f37159a0d6d2b61e64ed Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 17 Feb 2018 15:16:22 +0800
Subject: xfrm: do not call rcu_read_unlock when afinfo is NULL in xfrm_get_tos

When xfrm_policy_get_afinfo returns NULL, it will not hold rcu
read lock. In this case, rcu_read_unlock should not be called
in xfrm_get_tos, just like other places where it's calling
xfrm_policy_get_afinfo.

Fixes: f5e2bb4f5b22 ("xfrm: policy: xfrm_get_tos cannot fail")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8b3811ff002d..150d46633ce6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1458,10 +1458,13 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
 static int xfrm_get_tos(const struct flowi *fl, int family)
 {
 	const struct xfrm_policy_afinfo *afinfo;
-	int tos = 0;
+	int tos;
 
 	afinfo = xfrm_policy_get_afinfo(family);
-	tos = afinfo ? afinfo->get_tos(fl) : 0;
+	if (!afinfo)
+		return 0;
+
+	tos = afinfo->get_tos(fl);
 
 	rcu_read_unlock();
 
-- 
cgit v1.2.3-59-g8ed1b


From 013cb81e89f8a70deef086ca29a923faf5585ab0 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 19 Feb 2018 07:44:07 +0100
Subject: xfrm: Fix infinite loop in xfrm_get_dst_nexthop with transport mode.

On transport mode we forget to fetch the child dst_entry
before we continue the while loop, this leads to an infinite
loop. Fix this by fetching the child dst_entry before we
continue the while loop.

Fixes: 0f6c480f23f4 ("xfrm: Move dst->path into struct xfrm_dst")
Reported-by: syzbot+7d03c810e50aaedef98a@syzkaller.appspotmail.com
Tested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 150d46633ce6..625b3fca5704 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2732,14 +2732,14 @@ static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
 	while (dst->xfrm) {
 		const struct xfrm_state *xfrm = dst->xfrm;
 
+		dst = xfrm_dst_child(dst);
+
 		if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
 			continue;
 		if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR)
 			daddr = xfrm->coaddr;
 		else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
 			daddr = &xfrm->id.daddr;
-
-		dst = xfrm_dst_child(dst);
 	}
 	return daddr;
 }
-- 
cgit v1.2.3-59-g8ed1b


From b8b549eec8187ac1b12075d69a2d84d89b5e811a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 28 Feb 2018 09:23:19 +0100
Subject: xfrm: Fix ESN sequence number handling for IPsec GSO packets.

When IPsec offloading was introduced, we accidentally incremented
the sequence number counter on the xfrm_state by one packet
too much in the ESN case. This leads to a sequence number gap of
one packet after each GSO packet. Fix this by setting the sequence
number to the correct value.

Fixes: d7dbefc45cf5 ("xfrm: Add xfrm_replay_overflow functions for offloading")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_replay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 1d38c6acf8af..9e3a5e85f828 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -660,7 +660,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
 		} else {
 			XFRM_SKB_CB(skb)->seq.output.low = oseq + 1;
 			XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
-			xo->seq.low = oseq = oseq + 1;
+			xo->seq.low = oseq + 1;
 			xo->seq.hi = oseq_hi;
 			oseq += skb_shinfo(skb)->gso_segs;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 64e759f58f128730b97a3c3a26d283c075ad7c86 Mon Sep 17 00:00:00 2001
From: Szymon Janc <szymon.janc@codecoup.pl>
Date: Mon, 26 Feb 2018 15:41:53 +0100
Subject: Bluetooth: Fix missing encryption refresh on Security Request

If Security Request is received on connection that is already encrypted
with sufficient security master should perform encryption key refresh
procedure instead of just ignoring Slave Security Request
(Core Spec 5.0 Vol 3 Part H 2.4.6).

> ACL Data RX: Handle 3585 flags 0x02 dlen 6
      SMP: Security Request (0x0b) len 1
        Authentication requirement: Bonding, No MITM, SC, No Keypresses (0x09)
< HCI Command: LE Start Encryption (0x08|0x0019) plen 28
        Handle: 3585
        Random number: 0x0000000000000000
        Encrypted diversifier: 0x0000
        Long term key: 44264272a5c426a9e868f034cf0e69f3
> HCI Event: Command Status (0x0f) plen 4
      LE Start Encryption (0x08|0x0019) ncmd 1
        Status: Success (0x00)
> HCI Event: Encryption Key Refresh Complete (0x30) plen 3
        Status: Success (0x00)
        Handle: 3585

Signed-off-by: Szymon Janc <szymon.janc@codecoup.pl>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 01117ae84f1d..a2ddae2f37d7 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2296,8 +2296,14 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	else
 		sec_level = authreq_to_seclevel(auth);
 
-	if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK))
+	if (smp_sufficient_security(hcon, sec_level, SMP_USE_LTK)) {
+		/* If link is already encrypted with sufficient security we
+		 * still need refresh encryption as per Core Spec 5.0 Vol 3,
+		 * Part H 2.4.6
+		 */
+		smp_ltk_encrypt(conn, hcon->sec_level);
 		return 0;
+	}
 
 	if (sec_level > hcon->pending_sec_level)
 		hcon->pending_sec_level = sec_level;
-- 
cgit v1.2.3-59-g8ed1b


From d02f51cbcf12b09ab945873e35046045875eed9a Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Sat, 3 Mar 2018 03:03:46 +0100
Subject: bpf: fix bpf_skb_adjust_net/bpf_skb_proto_xlat to deal with gso sctp
 skbs

SCTP GSO skbs have a gso_size of GSO_BY_FRAGS, so any sort of
unconditionally mangling of that will result in nonsense value
and would corrupt the skb later on.

Therefore, i) add two helpers skb_increase_gso_size() and
skb_decrease_gso_size() that would throw a one time warning and
bail out for such skbs and ii) refuse and return early with an
error in those BPF helpers that are affected. We do need to bail
out as early as possible from there before any changes on the
skb have been performed.

Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper")
Co-authored-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Axtens <dja@axtens.net>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/networking/segmentation-offloads.txt | 11 +++-
 include/linux/skbuff.h                             | 22 ++++++++
 net/core/filter.c                                  | 60 +++++++++++++++-------
 3 files changed, 73 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt
index d47480b61ac6..23a8dd91a9ec 100644
--- a/Documentation/networking/segmentation-offloads.txt
+++ b/Documentation/networking/segmentation-offloads.txt
@@ -153,8 +153,15 @@ To signal this, gso_size is set to the special value GSO_BY_FRAGS.
 
 Therefore, any code in the core networking stack must be aware of the
 possibility that gso_size will be GSO_BY_FRAGS and handle that case
-appropriately. (For size checks, the skb_gso_validate_*_len family of
-helpers do this automatically.)
+appropriately.
+
+There are a couple of helpers to make this easier:
+
+ - For size checks, the skb_gso_validate_*_len family of helpers correctly
+   considers GSO_BY_FRAGS.
+
+ - For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size
+   will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs.
 
 This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
 set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c1e66bdcf583..8c67c33f40c9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4038,6 +4038,12 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
+/* Note: Should be called only if skb_is_gso(skb) is true */
+static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
+}
+
 static inline void skb_gso_reset(struct sk_buff *skb)
 {
 	skb_shinfo(skb)->gso_size = 0;
@@ -4045,6 +4051,22 @@ static inline void skb_gso_reset(struct sk_buff *skb)
 	skb_shinfo(skb)->gso_type = 0;
 }
 
+static inline void skb_increase_gso_size(struct skb_shared_info *shinfo,
+					 u16 increment)
+{
+	if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
+		return;
+	shinfo->gso_size += increment;
+}
+
+static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo,
+					 u16 decrement)
+{
+	if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS))
+		return;
+	shinfo->gso_size -= decrement;
+}
+
 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
diff --git a/net/core/filter.c b/net/core/filter.c
index 0c121adbdbaa..48aa7c7320db 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2087,6 +2087,10 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
+	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+		return -ENOTSUPP;
+
 	ret = skb_cow(skb, len_diff);
 	if (unlikely(ret < 0))
 		return ret;
@@ -2096,19 +2100,21 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
 		/* SKB_GSO_TCPV4 needs to be changed into
 		 * SKB_GSO_TCPV6.
 		 */
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
-			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
-			skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV6;
+		if (shinfo->gso_type & SKB_GSO_TCPV4) {
+			shinfo->gso_type &= ~SKB_GSO_TCPV4;
+			shinfo->gso_type |=  SKB_GSO_TCPV6;
 		}
 
 		/* Due to IPv6 header, MSS needs to be downgraded. */
-		skb_shinfo(skb)->gso_size -= len_diff;
+		skb_decrease_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
+		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_segs = 0;
 	}
 
 	skb->protocol = htons(ETH_P_IPV6);
@@ -2123,6 +2129,10 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
+	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+		return -ENOTSUPP;
+
 	ret = skb_unclone(skb, GFP_ATOMIC);
 	if (unlikely(ret < 0))
 		return ret;
@@ -2132,19 +2142,21 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
 		/* SKB_GSO_TCPV6 needs to be changed into
 		 * SKB_GSO_TCPV4.
 		 */
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
-			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
-			skb_shinfo(skb)->gso_type |=  SKB_GSO_TCPV4;
+		if (shinfo->gso_type & SKB_GSO_TCPV6) {
+			shinfo->gso_type &= ~SKB_GSO_TCPV6;
+			shinfo->gso_type |=  SKB_GSO_TCPV4;
 		}
 
 		/* Due to IPv4 header, MSS can be upgraded. */
-		skb_shinfo(skb)->gso_size += len_diff;
+		skb_increase_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
+		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_segs = 0;
 	}
 
 	skb->protocol = htons(ETH_P_IP);
@@ -2243,6 +2255,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
+	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+		return -ENOTSUPP;
+
 	ret = skb_cow(skb, len_diff);
 	if (unlikely(ret < 0))
 		return ret;
@@ -2252,11 +2268,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
 		return ret;
 
 	if (skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
 		/* Due to header grow, MSS needs to be downgraded. */
-		skb_shinfo(skb)->gso_size -= len_diff;
+		skb_decrease_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
+		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_segs = 0;
 	}
 
 	return 0;
@@ -2267,6 +2285,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
+	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
+	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+		return -ENOTSUPP;
+
 	ret = skb_unclone(skb, GFP_ATOMIC);
 	if (unlikely(ret < 0))
 		return ret;
@@ -2276,11 +2298,13 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
 		return ret;
 
 	if (skb_is_gso(skb)) {
+		struct skb_shared_info *shinfo = skb_shinfo(skb);
+
 		/* Due to header shrink, MSS can be upgraded. */
-		skb_shinfo(skb)->gso_size += len_diff;
+		skb_increase_gso_size(shinfo, len_diff);
 		/* Header must be checked, and gso_segs recomputed. */
-		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-		skb_shinfo(skb)->gso_segs = 0;
+		shinfo->gso_type |= SKB_GSO_DODGY;
+		shinfo->gso_segs = 0;
 	}
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 74c12c630fe310eb7fcae1b292257d47781fff0a Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Sun, 4 Mar 2018 13:08:17 +0100
Subject: batman-adv: Fix multicast packet loss with a single WANT_ALL_IPV4/6
 flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the kernel doc describes too the code is supposed to skip adding
multicast TT entries if both the WANT_ALL_IPV4 and WANT_ALL_IPV6 flags
are present.

Unfortunately, the current code even skips adding multicast TT entries
if only either the WANT_ALL_IPV4 or WANT_ALL_IPV6 is present.

This could lead to IPv6 multicast packet loss if only an IGMP but not an
MLD querier is present for instance or vice versa.

Fixes: 687937ab3489 ("batman-adv: Add multicast optimization support for bridged setups")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/multicast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index cbdeb47ec3f6..d70640135e3a 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -543,8 +543,8 @@ update:
 		bat_priv->mcast.enabled = true;
 	}
 
-	return !(mcast_data.flags &
-		 (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
+	return !(mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV4 &&
+		 mcast_data.flags & BATADV_MCAST_WANT_ALL_IPV6);
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 0d3601d2994a4ad9edf0b44f2ea34c68ff494ea5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 5 Mar 2018 16:07:38 +0100
Subject: netfilter: nft_set_hash: skip fixed hash if timeout is specified

Fixed hash supports to timeouts, so skip it. Otherwise, userspace hits
EOPNOTSUPP.

Fixes: 6c03ae210ce3 ("netfilter: nft_set_hash: add non-resizable hashtable implementation")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_hash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 3f1624ee056f..d40591fe1b2f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -674,7 +674,7 @@ static const struct nft_set_ops *
 nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
 		    u32 flags)
 {
-	if (desc->size) {
+	if (desc->size && !(flags & NFT_SET_TIMEOUT)) {
 		switch (desc->klen) {
 		case 4:
 			return &nft_hash_fast_ops;
-- 
cgit v1.2.3-59-g8ed1b


From 2cbb4ea7de167b02ffa63e9cdfdb07a7e7094615 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 16 Feb 2018 11:03:03 -0800
Subject: net: Only honor ifindex in IP_PKTINFO if non-0

Only allow ifindex from IP_PKTINFO to override SO_BINDTODEVICE settings
if the index is actually set in the message.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 9c41a0cef1a5..74c962b9b09c 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -258,7 +258,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 			src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
 			if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
 				return -EINVAL;
-			ipc->oif = src_info->ipi6_ifindex;
+			if (src_info->ipi6_ifindex)
+				ipc->oif = src_info->ipi6_ifindex;
 			ipc->addr = src_info->ipi6_addr.s6_addr32[3];
 			continue;
 		}
@@ -288,7 +289,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
 				return -EINVAL;
 			info = (struct in_pktinfo *)CMSG_DATA(cmsg);
-			ipc->oif = info->ipi_ifindex;
+			if (info->ipi_ifindex)
+				ipc->oif = info->ipi_ifindex;
 			ipc->addr = info->ipi_spec_dst.s_addr;
 			break;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 87cdf3148b11d46382dbce2754ae7036aba96380 Mon Sep 17 00:00:00 2001
From: Yossi Kuperman <yossiku@mellanox.com>
Date: Sun, 4 Mar 2018 21:03:52 +0200
Subject: xfrm: Verify MAC header exists before overwriting
 eth_hdr(skb)->h_proto

Artem Savkov reported that commit 5efec5c655dd leads to a packet loss under
IPSec configuration. It appears that his setup consists of a TUN device,
which does not have a MAC header.

Make sure MAC header exists.

Note: TUN device sets a MAC header pointer, although it does not have one.

Fixes: 5efec5c655dd ("xfrm: Fix eth_hdr(skb)->h_proto to reflect inner IP version")
Reported-by: Artem Savkov <artem.savkov@gmail.com>
Tested-by: Artem Savkov <artem.savkov@gmail.com>
Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_mode_tunnel.c | 3 ++-
 net/ipv6/xfrm6_mode_tunnel.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 63faeee989a9..2a9764bd1719 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,7 +92,8 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	skb_mac_header_rebuild(skb);
-	eth_hdr(skb)->h_proto = skb->protocol;
+	if (skb->mac_len)
+		eth_hdr(skb)->h_proto = skb->protocol;
 
 	err = 0;
 
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index bb935a3b7fea..de1b0b8c53b0 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -92,7 +92,8 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	skb_mac_header_rebuild(skb);
-	eth_hdr(skb)->h_proto = skb->protocol;
+	if (skb->mac_len)
+		eth_hdr(skb)->h_proto = skb->protocol;
 
 	err = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 35d889d10b649fda66121891ec05eca88150059d Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Mon, 5 Mar 2018 20:52:54 +0300
Subject: sch_netem: fix skb leak in netem_enqueue()

When we exceed current packets limit and we have more than one
segment in the list returned by skb_gso_segment(), netem drops
only the first one, skipping the rest, hence kmemleak reports:

unreferenced object 0xffff880b5d23b600 (size 1024):
  comm "softirq", pid 0, jiffies 4384527763 (age 2770.629s)
  hex dump (first 32 bytes):
    00 80 23 5d 0b 88 ff ff 00 00 00 00 00 00 00 00  ..#]............
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000d8a19b9d>] __alloc_skb+0xc9/0x520
    [<000000001709b32f>] skb_segment+0x8c8/0x3710
    [<00000000c7b9bb88>] tcp_gso_segment+0x331/0x1830
    [<00000000c921cba1>] inet_gso_segment+0x476/0x1370
    [<000000008b762dd4>] skb_mac_gso_segment+0x1f9/0x510
    [<000000002182660a>] __skb_gso_segment+0x1dd/0x620
    [<00000000412651b9>] netem_enqueue+0x1536/0x2590 [sch_netem]
    [<0000000005d3b2a9>] __dev_queue_xmit+0x1167/0x2120
    [<00000000fc5f7327>] ip_finish_output2+0x998/0xf00
    [<00000000d309e9d3>] ip_output+0x1aa/0x2c0
    [<000000007ecbd3a4>] tcp_transmit_skb+0x18db/0x3670
    [<0000000042d2a45f>] tcp_write_xmit+0x4d4/0x58c0
    [<0000000056a44199>] tcp_tasklet_func+0x3d9/0x540
    [<0000000013d06d02>] tasklet_action+0x1ca/0x250
    [<00000000fcde0b8b>] __do_softirq+0x1b4/0x5a3
    [<00000000e7ed027c>] irq_exit+0x1e2/0x210

Fix it by adding the rest of the segments, if any, to skb 'to_free'
list. Add new __qdisc_drop_all() and qdisc_drop_all() functions
because they can be useful in the future if we need to drop segmented
GSO packets in other places.

Fixes: 6071bd1aa13e ("netem: Segment GSO packets on enqueue")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 19 +++++++++++++++++++
 net/sched/sch_netem.c     |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e2ab13687fb9..2092d33194dd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -824,6 +824,16 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
 	*to_free = skb;
 }
 
+static inline void __qdisc_drop_all(struct sk_buff *skb,
+				    struct sk_buff **to_free)
+{
+	if (skb->prev)
+		skb->prev->next = *to_free;
+	else
+		skb->next = *to_free;
+	*to_free = skb;
+}
+
 static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
 						   struct qdisc_skb_head *qh,
 						   struct sk_buff **to_free)
@@ -956,6 +966,15 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
 	return NET_XMIT_DROP;
 }
 
+static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
+				 struct sk_buff **to_free)
+{
+	__qdisc_drop_all(skb, to_free);
+	qdisc_qstats_drop(sch);
+
+	return NET_XMIT_DROP;
+}
+
 /* Length to Time (L2T) lookup in a qdisc_rate_table, to determine how
    long it will take to send a packet given its size.
  */
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7c179addebcd..7d6801fc5340 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -509,7 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	if (unlikely(sch->q.qlen >= sch->limit))
-		return qdisc_drop(skb, sch, to_free);
+		return qdisc_drop_all(skb, sch, to_free);
 
 	qdisc_qstats_backlog_inc(sch, skb);
 
-- 
cgit v1.2.3-59-g8ed1b


From e9fa1495d738e34fcec88a3d2ec9101a9ee5b310 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Tue, 6 Mar 2018 11:10:19 +0100
Subject: ipv6: Reflect MTU changes on PMTU of exceptions for MTU-less routes

Currently, administrative MTU changes on a given netdevice are
not reflected on route exceptions for MTU-less routes, with a
set PMTU value, for that device:

 # ip -6 route get 2001:db8::b
 2001:db8::b from :: dev vti_a proto kernel src 2001:db8::a metric 256 pref medium
 # ping6 -c 1 -q -s10000 2001:db8::b > /dev/null
 # ip netns exec a ip -6 route get 2001:db8::b
 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0
     cache expires 571sec mtu 4926 pref medium
 # ip link set dev vti_a mtu 3000
 # ip -6 route get 2001:db8::b
 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0
     cache expires 571sec mtu 4926 pref medium
 # ip link set dev vti_a mtu 9000
 # ip -6 route get 2001:db8::b
 2001:db8::b from :: dev vti_a src 2001:db8::a metric 0
     cache expires 571sec mtu 4926 pref medium

The first issue is that since commit fb56be83e43d ("net-ipv6: on
device mtu change do not add mtu to mtu-less routes") we don't
call rt6_exceptions_update_pmtu() from rt6_mtu_change_route(),
which handles administrative MTU changes, if the regular route
is MTU-less.

However, PMTU exceptions should be always updated, as long as
RTAX_MTU is not locked. Keep the check for MTU-less main route,
as introduced by that commit, but, for exceptions,
call rt6_exceptions_update_pmtu() regardless of that check.

Once that is fixed, one problem remains: MTU changes are not
reflected if the new MTU is higher than the previous one,
because rt6_exceptions_update_pmtu() doesn't allow that. We
should instead allow PMTU increase if the old PMTU matches the
local MTU, as that implies that the old MTU was the lowest in the
path, and PMTU discovery might lead to different results.

The existing check in rt6_mtu_change_route() correctly took that
case into account (for regular routes only), so factor it out
and re-use it also in rt6_exceptions_update_pmtu().

While at it, fix comments style and grammar, and try to be a bit
more descriptive.

Reported-by: Xiumei Mu <xmu@redhat.com>
Fixes: fb56be83e43d ("net-ipv6: on device mtu change do not add mtu to mtu-less routes")
Fixes: f5bbe7ee79c2 ("ipv6: prepare rt6_mtu_change() for exception table")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 71 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9dcfadddd800..0db4218c9186 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1509,7 +1509,30 @@ static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
 	}
 }
 
-static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
+					 struct rt6_info *rt, int mtu)
+{
+	/* If the new MTU is lower than the route PMTU, this new MTU will be the
+	 * lowest MTU in the path: always allow updating the route PMTU to
+	 * reflect PMTU decreases.
+	 *
+	 * If the new MTU is higher, and the route PMTU is equal to the local
+	 * MTU, this means the old MTU is the lowest in the path, so allow
+	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
+	 * handle this.
+	 */
+
+	if (dst_mtu(&rt->dst) >= mtu)
+		return true;
+
+	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
+		return true;
+
+	return false;
+}
+
+static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
+				       struct rt6_info *rt, int mtu)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1518,20 +1541,22 @@ static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
 					lockdep_is_held(&rt6_exception_lock));
 
-	if (bucket) {
-		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
-			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
-				struct rt6_info *entry = rt6_ex->rt6i;
-				/* For RTF_CACHE with rt6i_pmtu == 0
-				 * (i.e. a redirected route),
-				 * the metrics of its rt->dst.from has already
-				 * been updated.
-				 */
-				if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
-					entry->rt6i_pmtu = mtu;
-			}
-			bucket++;
+	if (!bucket)
+		return;
+
+	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+			struct rt6_info *entry = rt6_ex->rt6i;
+
+			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
+			 * route), the metrics of its rt->dst.from have already
+			 * been updated.
+			 */
+			if (entry->rt6i_pmtu &&
+			    rt6_mtu_change_route_allowed(idev, entry, mtu))
+				entry->rt6i_pmtu = mtu;
 		}
+		bucket++;
 	}
 }
 
@@ -3809,25 +3834,13 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   Since RFC 1981 doesn't include administrative MTU increase
 	   update PMTU increase is a MUST. (i.e. jumbo frame)
 	 */
-	/*
-	   If new MTU is less than route PMTU, this new MTU will be the
-	   lowest MTU in the path, update the route PMTU to reflect PMTU
-	   decreases; if new MTU is greater than route PMTU, and the
-	   old MTU is the lowest MTU in the path, update the route PMTU
-	   to reflect the increase. In this case if the other nodes' MTU
-	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
-	   PMTU discovery.
-	 */
 	if (rt->dst.dev == arg->dev &&
-	    dst_metric_raw(&rt->dst, RTAX_MTU) &&
 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
 		spin_lock_bh(&rt6_exception_lock);
-		if (dst_mtu(&rt->dst) >= arg->mtu ||
-		    (dst_mtu(&rt->dst) < arg->mtu &&
-		     dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+		if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
+		    rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
-		}
-		rt6_exceptions_update_pmtu(rt, arg->mtu);
+		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
 		spin_unlock_bh(&rt6_exception_lock);
 	}
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From a560002437d3646dafccecb1bf32d1685112ddda Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 6 Mar 2018 18:46:39 +0300
Subject: net: Fix hlist corruptions in inet_evict_bucket()

inet_evict_bucket() iterates global list, and
several tasks may call it in parallel. All of
them hash the same fq->list_evictor to different
lists, which leads to list corruption.

This patch makes fq be hashed to expired list
only if this has not been made yet by another
task. Since inet_frag_alloc() allocates fq
using kmem_cache_zalloc(), we may rely on
list_evictor is initially unhashed.

The problem seems to exist before async
pernet_operations, as there was possible to have
exit method to be executed in parallel with
inet_frags::frags_work, so I add two Fixes tags.
This also may go to stable.

Fixes: d1fe19444d82 "inet: frag: don't re-use chainlist for evictor"
Fixes: f84c6821aa54 "net: Convert pernet_subsys, registered from inet_init()"
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 26a3d0315728..e8ec28999f5c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -119,6 +119,9 @@ out:
 
 static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
 {
+	if (!hlist_unhashed(&q->list_evictor))
+		return false;
+
 	return q->net->low_thresh == 0 ||
 	       frag_mem_limit(q->net) >= q->net->low_thresh;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 17cfe79a65f98abe535261856c5aef14f306dff7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 6 Mar 2018 07:54:53 -0800
Subject: l2tp: do not accept arbitrary sockets

syzkaller found an issue caused by lack of sufficient checks
in l2tp_tunnel_create()

RAW sockets can not be considered as UDP ones for instance.

In another patch, we shall replace all pr_err() by less intrusive
pr_debug() so that syzkaller can find other bugs faster.
Acked-by: Guillaume Nault <g.nault@alphalink.fr>
Acked-by: James Chapman <jchapman@katalix.com>

==================================================================
BUG: KASAN: slab-out-of-bounds in setup_udp_tunnel_sock+0x3ee/0x5f0 net/ipv4/udp_tunnel.c:69
dst_release: dst:00000000d53d0d0f refcnt:-1
Write of size 1 at addr ffff8801d013b798 by task syz-executor3/6242

CPU: 1 PID: 6242 Comm: syz-executor3 Not tainted 4.16.0-rc2+ #253
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x194/0x24d lib/dump_stack.c:53
 print_address_description+0x73/0x250 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report+0x23b/0x360 mm/kasan/report.c:412
 __asan_report_store1_noabort+0x17/0x20 mm/kasan/report.c:435
 setup_udp_tunnel_sock+0x3ee/0x5f0 net/ipv4/udp_tunnel.c:69
 l2tp_tunnel_create+0x1354/0x17f0 net/l2tp/l2tp_core.c:1596
 pppol2tp_connect+0x14b1/0x1dd0 net/l2tp/l2tp_ppp.c:707
 SYSC_connect+0x213/0x4a0 net/socket.c:1640
 SyS_connect+0x24/0x30 net/socket.c:1621
 do_syscall_64+0x280/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 83421c6f0bef..e22512e32827 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1457,9 +1457,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		encap = cfg->encap;
 
 	/* Quick sanity checks */
+	err = -EPROTONOSUPPORT;
+	if (sk->sk_type != SOCK_DGRAM) {
+		pr_debug("tunl %hu: fd %d wrong socket type\n",
+			 tunnel_id, fd);
+		goto err;
+	}
 	switch (encap) {
 	case L2TP_ENCAPTYPE_UDP:
-		err = -EPROTONOSUPPORT;
 		if (sk->sk_protocol != IPPROTO_UDP) {
 			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
 			       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
@@ -1467,7 +1472,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		}
 		break;
 	case L2TP_ENCAPTYPE_IP:
-		err = -EPROTONOSUPPORT;
 		if (sk->sk_protocol != IPPROTO_L2TP) {
 			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
 			       tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
-- 
cgit v1.2.3-59-g8ed1b


From 67f93df79aeefc3add4e4b31a752600f834236e2 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Tue, 6 Mar 2018 22:57:01 +0300
Subject: dccp: check sk for closed state in dccp_sendmsg()

dccp_disconnect() sets 'dp->dccps_hc_tx_ccid' tx handler to NULL,
therefore if DCCP socket is disconnected and dccp_sendmsg() is
called after it, it will cause a NULL pointer dereference in
dccp_write_xmit().

This crash and the reproducer was reported by syzbot. Looks like
it is reproduced if commit 69c64866ce07 ("dccp: CVE-2017-8824:
use-after-free in DCCP code") is applied.

Reported-by: syzbot+f99ab3887ab65d70f816@syzkaller.appspotmail.com
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 15bdc002d90c..84cd4e3fd01b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -794,6 +794,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (skb == NULL)
 		goto out_release;
 
+	if (sk->sk_state == DCCP_CLOSED) {
+		rc = -ENOTCONN;
+		goto out_discard;
+	}
+
 	skb_reserve(skb, sk->sk_prot->max_header);
 	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (rc != 0)
-- 
cgit v1.2.3-59-g8ed1b


From e05836ac07c77dd90377f8c8140bce2a44af5fe7 Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Tue, 6 Mar 2018 17:15:12 -0500
Subject: tcp: purge write queue upon aborting the connection

When the connection is aborted, there is no point in
keeping the packets on the write queue until the connection
is closed.

Similar to a27fd7a8ed38 ('tcp: purge write queue upon RST'),
this is essential for a correct MSG_ZEROCOPY implementation,
because userspace cannot call close(fd) before receiving
zerocopy signals even when the connection is aborted.

Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY")
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c       | 1 +
 net/ipv4/tcp_timer.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 48636aee23c3..8b8059b7af4d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3566,6 +3566,7 @@ int tcp_abort(struct sock *sk, int err)
 
 	bh_unlock_sock(sk);
 	local_bh_enable();
+	tcp_write_queue_purge(sk);
 	release_sock(sk);
 	return 0;
 }
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 71fc60f1b326..f7d944855f8e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -34,6 +34,7 @@ static void tcp_write_err(struct sock *sk)
 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
 	sk->sk_error_report(sk);
 
+	tcp_write_queue_purge(sk);
 	tcp_done(sk);
 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
 }
-- 
cgit v1.2.3-59-g8ed1b


From b51f26b14683838825170387457176c1ffaea9f5 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 6 Mar 2018 17:27:44 -0500
Subject: net: don't unnecessarily load kernel modules in dev_ioctl()

Starting with v4.16-rc1 we've been seeing a higher than usual number
of requests for the kernel to load networking modules, even on events
which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
Smalley suggested the problem may lie in commit 44c02a2c3dc5
("dev_ioctl(): move copyin/copyout to callers") which moves changes
the network dev_ioctl() function to always call dev_load(),
regardless of the requested ioctl.

This patch moves the dev_load() calls back into the individual ioctls
while preserving the rest of the original patch.

Reported-by: Dominick Grift <dac.override@gmail.com>
Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_ioctl.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 0ab1af04296c..a04e1e88bf3a 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	if (colon)
 		*colon = 0;
 
-	dev_load(net, ifr->ifr_name);
-
 	/*
 	 *	See which interface the caller is talking about.
 	 */
@@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGIFMAP:
 	case SIOCGIFINDEX:
 	case SIOCGIFTXQLEN:
+		dev_load(net, ifr->ifr_name);
 		rcu_read_lock();
 		ret = dev_ifsioc_locked(net, ifr, cmd);
 		rcu_read_unlock();
@@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		return ret;
 
 	case SIOCETHTOOL:
+		dev_load(net, ifr->ifr_name);
 		rtnl_lock();
 		ret = dev_ethtool(net, ifr);
 		rtnl_unlock();
@@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSIFNAME:
+		dev_load(net, ifr->ifr_name);
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 		rtnl_lock();
@@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		/* fall through */
 	case SIOCBONDSLAVEINFOQUERY:
 	case SIOCBONDINFOQUERY:
+		dev_load(net, ifr->ifr_name);
 		rtnl_lock();
 		ret = dev_ifsioc(net, ifr, cmd);
 		rtnl_unlock();
@@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		    cmd == SIOCGHWTSTAMP ||
 		    (cmd >= SIOCDEVPRIVATE &&
 		     cmd <= SIOCDEVPRIVATE + 15)) {
+			dev_load(net, ifr->ifr_name);
 			rtnl_lock();
 			ret = dev_ifsioc(net, ifr, cmd);
 			rtnl_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From 9f62c15f28b0d1d746734666d88a79f08ba1e43e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Thu, 8 Mar 2018 17:00:02 +0100
Subject: ipv6: fix access to non-linear packet in
 ndisc_fill_redirect_hdr_option()

Fix the following slab-out-of-bounds kasan report in
ndisc_fill_redirect_hdr_option when the incoming ipv6 packet is not
linear and the accessed data are not in the linear data region of orig_skb.

[ 1503.122508] ==================================================================
[ 1503.122832] BUG: KASAN: slab-out-of-bounds in ndisc_send_redirect+0x94e/0x990
[ 1503.123036] Read of size 1184 at addr ffff8800298ab6b0 by task netperf/1932

[ 1503.123220] CPU: 0 PID: 1932 Comm: netperf Not tainted 4.16.0-rc2+ #124
[ 1503.123347] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-2.fc27 04/01/2014
[ 1503.123527] Call Trace:
[ 1503.123579]  <IRQ>
[ 1503.123638]  print_address_description+0x6e/0x280
[ 1503.123849]  kasan_report+0x233/0x350
[ 1503.123946]  memcpy+0x1f/0x50
[ 1503.124037]  ndisc_send_redirect+0x94e/0x990
[ 1503.125150]  ip6_forward+0x1242/0x13b0
[...]
[ 1503.153890] Allocated by task 1932:
[ 1503.153982]  kasan_kmalloc+0x9f/0xd0
[ 1503.154074]  __kmalloc_track_caller+0xb5/0x160
[ 1503.154198]  __kmalloc_reserve.isra.41+0x24/0x70
[ 1503.154324]  __alloc_skb+0x130/0x3e0
[ 1503.154415]  sctp_packet_transmit+0x21a/0x1810
[ 1503.154533]  sctp_outq_flush+0xc14/0x1db0
[ 1503.154624]  sctp_do_sm+0x34e/0x2740
[ 1503.154715]  sctp_primitive_SEND+0x57/0x70
[ 1503.154807]  sctp_sendmsg+0xaa6/0x1b10
[ 1503.154897]  sock_sendmsg+0x68/0x80
[ 1503.154987]  ___sys_sendmsg+0x431/0x4b0
[ 1503.155078]  __sys_sendmsg+0xa4/0x130
[ 1503.155168]  do_syscall_64+0x171/0x3f0
[ 1503.155259]  entry_SYSCALL_64_after_hwframe+0x42/0xb7

[ 1503.155436] Freed by task 1932:
[ 1503.155527]  __kasan_slab_free+0x134/0x180
[ 1503.155618]  kfree+0xbc/0x180
[ 1503.155709]  skb_release_data+0x27f/0x2c0
[ 1503.155800]  consume_skb+0x94/0xe0
[ 1503.155889]  sctp_chunk_put+0x1aa/0x1f0
[ 1503.155979]  sctp_inq_pop+0x2f8/0x6e0
[ 1503.156070]  sctp_assoc_bh_rcv+0x6a/0x230
[ 1503.156164]  sctp_inq_push+0x117/0x150
[ 1503.156255]  sctp_backlog_rcv+0xdf/0x4a0
[ 1503.156346]  __release_sock+0x142/0x250
[ 1503.156436]  release_sock+0x80/0x180
[ 1503.156526]  sctp_sendmsg+0xbb0/0x1b10
[ 1503.156617]  sock_sendmsg+0x68/0x80
[ 1503.156708]  ___sys_sendmsg+0x431/0x4b0
[ 1503.156799]  __sys_sendmsg+0xa4/0x130
[ 1503.156889]  do_syscall_64+0x171/0x3f0
[ 1503.156980]  entry_SYSCALL_64_after_hwframe+0x42/0xb7

[ 1503.157158] The buggy address belongs to the object at ffff8800298ab600
                which belongs to the cache kmalloc-1024 of size 1024
[ 1503.157444] The buggy address is located 176 bytes inside of
                1024-byte region [ffff8800298ab600, ffff8800298aba00)
[ 1503.157702] The buggy address belongs to the page:
[ 1503.157820] page:ffffea0000a62a00 count:1 mapcount:0 mapping:0000000000000000 index:0x0 compound_mapcount: 0
[ 1503.158053] flags: 0x4000000000008100(slab|head)
[ 1503.158171] raw: 4000000000008100 0000000000000000 0000000000000000 00000001800e000e
[ 1503.158350] raw: dead000000000100 dead000000000200 ffff880036002600 0000000000000000
[ 1503.158523] page dumped because: kasan: bad access detected

[ 1503.158698] Memory state around the buggy address:
[ 1503.158816]  ffff8800298ab900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 1503.158988]  ffff8800298ab980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 1503.159165] >ffff8800298aba00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[ 1503.159338]                    ^
[ 1503.159436]  ffff8800298aba80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1503.159610]  ffff8800298abb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 1503.159785] ==================================================================
[ 1503.159964] Disabling lock debugging due to kernel taint

The test scenario to trigger the issue consists of 4 devices:
- H0: data sender, connected to LAN0
- H1: data receiver, connected to LAN1
- GW0 and GW1: routers between LAN0 and LAN1. Both of them have an
  ethernet connection on LAN0 and LAN1
On H{0,1} set GW0 as default gateway while on GW0 set GW1 as next hop for
data from LAN0 to LAN1.
Moreover create an ip6ip6 tunnel between H0 and H1 and send 3 concurrent
data streams (TCP/UDP/SCTP) from H0 to H1 through ip6ip6 tunnel (send
buffer size is set to 16K). While data streams are active flush the route
cache on HA multiple times.
I have not been able to identify a given commit that introduced the issue
since, using the reproducer described above, the kasan report has been
triggered from 4.14 and I have not gone back further.

Reported-by: Jianlin Shi <jishi@redhat.com>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f61a5b613b52..ba5e04c6ae17 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1554,7 +1554,8 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
 	*(opt++) = (rd_len >> 3);
 	opt += 6;
 
-	memcpy(opt, ipv6_hdr(orig_skb), rd_len - 8);
+	skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt,
+		      rd_len - 8);
 }
 
 void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
-- 
cgit v1.2.3-59-g8ed1b


From ca0edb131bdf1e6beaeb2b8289fd6b374b74147d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 5 Mar 2018 08:51:03 -0800
Subject: ieee802154: 6lowpan: fix possible NULL deref in lowpan_device_event()

A tun device type can trivially be set to arbitrary value using
TUNSETLINK ioctl().

Therefore, lowpan_device_event() must really check that ieee802154_ptr
is not NULL.

Fixes: 2c88b5283f60d ("ieee802154: 6lowpan: remove check on null")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Aring <alex.aring@gmail.com>
Cc: Stefan Schmidt <stefan@osg.samsung.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: Stefan Schmidt <stefan@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/6lowpan/core.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 974765b7d92a..e9f0489e4229 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -206,9 +206,13 @@ static inline void lowpan_netlink_fini(void)
 static int lowpan_device_event(struct notifier_block *unused,
 			       unsigned long event, void *ptr)
 {
-	struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
+	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+	struct wpan_dev *wpan_dev;
 
-	if (wdev->type != ARPHRD_IEEE802154)
+	if (ndev->type != ARPHRD_IEEE802154)
+		return NOTIFY_DONE;
+	wpan_dev = ndev->ieee802154_ptr;
+	if (!wpan_dev)
 		return NOTIFY_DONE;
 
 	switch (event) {
@@ -217,8 +221,8 @@ static int lowpan_device_event(struct notifier_block *unused,
 		 * also delete possible lowpan interfaces which belongs
 		 * to the wpan interface.
 		 */
-		if (wdev->ieee802154_ptr->lowpan_dev)
-			lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
+		if (wpan_dev->lowpan_dev)
+			lowpan_dellink(wpan_dev->lowpan_dev, NULL);
 		break;
 	default:
 		return NOTIFY_DONE;
-- 
cgit v1.2.3-59-g8ed1b


From 1dd27cde30e85774c77349c804229431616d594a Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Fri, 9 Mar 2018 14:06:09 +1100
Subject: net: use skb_is_gso_sctp() instead of open-coding

As well as the basic conversion, I noticed that a lot of the
SCTP code checks gso_type without first checking skb_is_gso()
so I have added that where appropriate.

Also, document the helper.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/segmentation-offloads.txt | 5 ++++-
 net/core/skbuff.c                                  | 2 +-
 net/sched/act_csum.c                               | 2 +-
 net/sctp/input.c                                   | 8 ++++----
 net/sctp/inqueue.c                                 | 2 +-
 net/sctp/offload.c                                 | 2 +-
 6 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt
index fc0c949e7f9c..aca542ec125c 100644
--- a/Documentation/networking/segmentation-offloads.txt
+++ b/Documentation/networking/segmentation-offloads.txt
@@ -155,7 +155,10 @@ Therefore, any code in the core networking stack must be aware of the
 possibility that gso_size will be GSO_BY_FRAGS and handle that case
 appropriately.
 
-There are a couple of helpers to make this easier:
+There are some helpers to make this easier:
+
+ - skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if
+   an skb is an SCTP GSO skb.
 
  - For size checks, the skb_gso_validate_*_len family of helpers correctly
    considers GSO_BY_FRAGS.
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0bb0d8877954..baf990528943 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4904,7 +4904,7 @@ static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
 			thlen += inner_tcp_hdrlen(skb);
 	} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
 		thlen = tcp_hdrlen(skb);
-	} else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) {
+	} else if (unlikely(skb_is_gso_sctp(skb))) {
 		thlen = sizeof(struct sctphdr);
 	}
 	/* UFO sets gso_size to the size of the fragmentation
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b7ba9b06b147..24b2e8e681cf 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -350,7 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
 {
 	struct sctphdr *sctph;
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)
+	if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
 		return 1;
 
 	sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph));
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 0247cc432e02..b381d78548ac 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -106,6 +106,7 @@ int sctp_rcv(struct sk_buff *skb)
 	int family;
 	struct sctp_af *af;
 	struct net *net = dev_net(skb->dev);
+	bool is_gso = skb_is_gso(skb) && skb_is_gso_sctp(skb);
 
 	if (skb->pkt_type != PACKET_HOST)
 		goto discard_it;
@@ -123,8 +124,7 @@ int sctp_rcv(struct sk_buff *skb)
 	 * it's better to just linearize it otherwise crc computing
 	 * takes longer.
 	 */
-	if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
-	     skb_linearize(skb)) ||
+	if ((!is_gso && skb_linearize(skb)) ||
 	    !pskb_may_pull(skb, sizeof(struct sctphdr)))
 		goto discard_it;
 
@@ -135,7 +135,7 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb_csum_unnecessary(skb))
 		__skb_decr_checksum_unnecessary(skb);
 	else if (!sctp_checksum_disable &&
-		 !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+		 !is_gso &&
 		 sctp_rcv_checksum(net, skb) < 0)
 		goto discard_it;
 	skb->csum_valid = 1;
@@ -1218,7 +1218,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
 	 * issue as packets hitting this are mostly INIT or INIT-ACK and
 	 * those cannot be on GSO-style anyway.
 	 */
-	if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
+	if (skb_is_gso(skb) && skb_is_gso_sctp(skb))
 		return NULL;
 
 	ch = (struct sctp_chunkhdr *)skb->data;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 48392552ee7c..23ebc5318edc 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -170,7 +170,7 @@ next_chunk:
 
 		chunk = list_entry(entry, struct sctp_chunk, list);
 
-		if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
+		if (skb_is_gso(chunk->skb) && skb_is_gso_sctp(chunk->skb)) {
 			/* GSO-marked skbs but without frags, handle
 			 * them normally
 			 */
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 35bc7106d182..123e9f2dc226 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,7 +45,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct sctphdr *sh;
 
-	if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+	if (!skb_is_gso_sctp(skb))
 		goto out;
 
 	sh = sctp_hdr(skb);
-- 
cgit v1.2.3-59-g8ed1b


From 3b04caab81649a9e8d5375b919b6653d791951df Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Fri, 9 Mar 2018 07:34:40 -0800
Subject: ip6gre: add erspan v2 to tunnel lookup

The patch adds the erspan v2 proto in ip6gre_tunnel_lookup
so the erspan v2 tunnel can be found correctly.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3c353125546d..83c2fffd40be 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -126,7 +126,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
 	struct ip6_tnl *t, *cand = NULL;
 	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
 	int dev_type = (gre_proto == htons(ETH_P_TEB) ||
-			gre_proto == htons(ETH_P_ERSPAN)) ?
+			gre_proto == htons(ETH_P_ERSPAN) ||
+			gre_proto == htons(ETH_P_ERSPAN2)) ?
 		       ARPHRD_ETHER : ARPHRD_IP6GRE;
 	int score, cand_score = 4;
 
-- 
cgit v1.2.3-59-g8ed1b


From d6aa71197ffcb68850bfebfc3fc160abe41df53b Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Fri, 9 Mar 2018 07:34:41 -0800
Subject: ip6erspan: improve error handling for erspan version number.

When users fill in incorrect erspan version number through
the struct erspan_metadata uapi, current code skips pushing
the erspan header but continue pushing the gre header, which
is incorrect.  The patch fixes it by returning error.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 83c2fffd40be..a299f5424e16 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -945,6 +945,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 					       md->u.md2.dir,
 					       get_hwid(&md->u.md2),
 					       truncate, false);
+		} else {
+			goto tx_err;
 		}
 	} else {
 		switch (skb->protocol) {
-- 
cgit v1.2.3-59-g8ed1b


From e41c7c68ea771683cae5a7f81c268f38d7912ecb Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Fri, 9 Mar 2018 07:34:42 -0800
Subject: ip6erspan: make sure enough headroom at xmit.

The patch adds skb_cow_header() to ensure enough headroom
at ip6erspan_tunnel_xmit before pushing the erspan header
to the skb.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index a299f5424e16..1bbd0930063e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -903,6 +903,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 		truncate = true;
 	}
 
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto tx_err;
+
 	t->parms.o_flags &= ~TUNNEL_KEY;
 	IPCB(skb)->flags = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 932909d9b28d27e807ff8eecb68c7748f6701628 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 8 Mar 2018 12:54:19 +0100
Subject: netfilter: ebtables: fix erroneous reject of last rule

The last rule in the blob has next_entry offset that is same as total size.
This made "ebtables32 -A OUTPUT -d de:ad:be:ef:01:02" fail on 64 bit kernel.

Fixes: b71812168571fa ("netfilter: ebtables: CONFIG_COMPAT: don't trust userland offsets")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 254ef9f49567..a94d23b0a9af 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2119,8 +2119,12 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 	 * offsets are relative to beginning of struct ebt_entry (i.e., 0).
 	 */
 	for (i = 0; i < 4 ; ++i) {
-		if (offsets[i] >= *total)
+		if (offsets[i] > *total)
 			return -EINVAL;
+
+		if (i < 3 && offsets[i] == *total)
+			return -EINVAL;
+
 		if (i == 0)
 			continue;
 		if (offsets[i-1] > offsets[i])
-- 
cgit v1.2.3-59-g8ed1b


From b1d0a5d0cba4597c0394997b2d5fced3e3841b4e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 10 Mar 2018 01:15:45 +0100
Subject: netfilter: x_tables: add and use xt_check_proc_name

recent and hashlimit both create /proc files, but only check that
name is 0 terminated.

This can trigger WARN() from procfs when name is "" or "/".
Add helper for this and then use it for both.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: <syzbot+0502b00edac2a0680b61@syzkaller.appspotmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h |  2 ++
 net/netfilter/x_tables.c           | 30 ++++++++++++++++++++++++++++++
 net/netfilter/xt_hashlimit.c       | 16 ++++++++++------
 net/netfilter/xt_recent.c          |  6 +++---
 4 files changed, 45 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 1313b35c3ab7..14529511c4b8 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -285,6 +285,8 @@ unsigned int *xt_alloc_entry_offsets(unsigned int size);
 bool xt_find_jump_offset(const unsigned int *offsets,
 			 unsigned int target, unsigned int size);
 
+int xt_check_proc_name(const char *name, unsigned int size);
+
 int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
 		   bool inv_proto);
 int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index fa1655aff8d3..4aa01c90e9d1 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -423,6 +423,36 @@ textify_hooks(char *buf, size_t size, unsigned int mask, uint8_t nfproto)
 	return buf;
 }
 
+/**
+ * xt_check_proc_name - check that name is suitable for /proc file creation
+ *
+ * @name: file name candidate
+ * @size: length of buffer
+ *
+ * some x_tables modules wish to create a file in /proc.
+ * This function makes sure that the name is suitable for this
+ * purpose, it checks that name is NUL terminated and isn't a 'special'
+ * name, like "..".
+ *
+ * returns negative number on error or 0 if name is useable.
+ */
+int xt_check_proc_name(const char *name, unsigned int size)
+{
+	if (name[0] == '\0')
+		return -EINVAL;
+
+	if (strnlen(name, size) == size)
+		return -ENAMETOOLONG;
+
+	if (strcmp(name, ".") == 0 ||
+	    strcmp(name, "..") == 0 ||
+	    strchr(name, '/'))
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(xt_check_proc_name);
+
 int xt_check_match(struct xt_mtchk_param *par,
 		   unsigned int size, u_int8_t proto, bool inv_proto)
 {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 66f5aca62a08..3360f13dc208 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -917,8 +917,9 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
-	if (info->name[sizeof(info->name) - 1] != '\0')
-		return -EINVAL;
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
 
@@ -935,8 +936,9 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
-	if (info->name[sizeof(info->name) - 1] != '\0')
-		return -EINVAL;
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
 
@@ -950,9 +952,11 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+	int ret;
 
-	if (info->name[sizeof(info->name) - 1] != '\0')
-		return -EINVAL;
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
 
 	return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
 					 info->name, 3);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 6d232d18faff..81ee1d6543b2 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -361,9 +361,9 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
 				    info->hit_count, XT_RECENT_MAX_NSTAMPS - 1);
 		return -EINVAL;
 	}
-	if (info->name[0] == '\0' ||
-	    strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
-		return -EINVAL;
+	ret = xt_check_proc_name(info->name, sizeof(info->name));
+	if (ret)
+		return ret;
 
 	if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot)
 		nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1;
-- 
cgit v1.2.3-59-g8ed1b


From c8d70a700a5b486bfa8e5a7d33d805389f6e59f9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 9 Mar 2018 14:27:31 +0100
Subject: netfilter: bridge: ebt_among: add more missing match size checks

ebt_among is special, it has a dynamic match size and is exempt
from the central size checks.

commit c4585a2823edf ("bridge: ebt_among: add missing match size checks")
added validation for pool size, but missed fact that the macros
ebt_among_wh_src/dst can already return out-of-bound result because
they do not check value of wh_src/dst_ofs (an offset) vs. the size
of the match that userspace gave to us.

v2:
check that offset has correct alignment.
Paolo Abeni points out that we should also check that src/dst
wormhash arrays do not overlap, and src + length lines up with
start of dst (or vice versa).
v3: compact wormhash_sizes_valid() part

NB: Fixes tag is intentionally wrong, this bug exists from day
one when match was added for 2.6 kernel. Tag is there so stable
maintainers will notice this one too.

Tested with same rules from the earlier patch.

Fixes: c4585a2823edf ("bridge: ebt_among: add missing match size checks")
Reported-by: <syzbot+bdabab6f1983a03fc009@syzkaller.appspotmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebt_among.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index c5afb4232ecb..620e54f08296 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -177,6 +177,28 @@ static bool poolsize_invalid(const struct ebt_mac_wormhash *w)
 	return w && w->poolsize >= (INT_MAX / sizeof(struct ebt_mac_wormhash_tuple));
 }
 
+static bool wormhash_offset_invalid(int off, unsigned int len)
+{
+	if (off == 0) /* not present */
+		return false;
+
+	if (off < (int)sizeof(struct ebt_among_info) ||
+	    off % __alignof__(struct ebt_mac_wormhash))
+		return true;
+
+	off += sizeof(struct ebt_mac_wormhash);
+
+	return off > len;
+}
+
+static bool wormhash_sizes_valid(const struct ebt_mac_wormhash *wh, int a, int b)
+{
+	if (a == 0)
+		a = sizeof(struct ebt_among_info);
+
+	return ebt_mac_wormhash_size(wh) + a == b;
+}
+
 static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
@@ -189,6 +211,10 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 	if (expected_length > em->match_size)
 		return -EINVAL;
 
+	if (wormhash_offset_invalid(info->wh_dst_ofs, em->match_size) ||
+	    wormhash_offset_invalid(info->wh_src_ofs, em->match_size))
+		return -EINVAL;
+
 	wh_dst = ebt_among_wh_dst(info);
 	if (poolsize_invalid(wh_dst))
 		return -EINVAL;
@@ -201,6 +227,14 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 	if (poolsize_invalid(wh_src))
 		return -EINVAL;
 
+	if (info->wh_src_ofs < info->wh_dst_ofs) {
+		if (!wormhash_sizes_valid(wh_src, info->wh_src_ofs, info->wh_dst_ofs))
+			return -EINVAL;
+	} else {
+		if (!wormhash_sizes_valid(wh_dst, info->wh_dst_ofs, info->wh_src_ofs))
+			return -EINVAL;
+	}
+
 	expected_length += ebt_mac_wormhash_size(wh_src);
 
 	if (em->match_size != EBT_ALIGN(expected_length)) {
-- 
cgit v1.2.3-59-g8ed1b


From c04a3f730021c304c7cc4bc30ee57ee70ad98d57 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 9 Mar 2018 18:56:26 +0100
Subject: netfilter: nf_tables: release flowtable hooks

Otherwise we leak this array.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 558593e6a0a3..c4acc7340eb1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5423,6 +5423,7 @@ err:
 static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 {
 	cancel_delayed_work_sync(&flowtable->data.gc_work);
+	kfree(flowtable->ops);
 	kfree(flowtable->name);
 	flowtable->data.type->free(&flowtable->data);
 	rhashtable_destroy(&flowtable->data.rhashtable);
-- 
cgit v1.2.3-59-g8ed1b


From ddc502dfed600bff0b61d899f70d95b76223fdfc Mon Sep 17 00:00:00 2001
From: zhangliping <zhangliping02@baidu.com>
Date: Fri, 9 Mar 2018 10:08:50 +0800
Subject: openvswitch: meter: fix the incorrect calculation of max delta_t

Max delat_t should be the full_bucket/rate instead of the full_bucket.
Also report EINVAL if the rate is zero.

Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure")
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: zhangliping <zhangliping02@baidu.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 04b94281a30b..b891a91577f8 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -242,14 +242,20 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 
 		band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
 		band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
+		if (band->rate == 0) {
+			err = -EINVAL;
+			goto exit_free_meter;
+		}
+
 		band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
 		/* Figure out max delta_t that is enough to fill any bucket.
 		 * Keep max_delta_t size to the bucket units:
 		 * pkts => 1/1000 packets, kilobits => bits.
+		 *
+		 * Start with a full bucket.
 		 */
-		band_max_delta_t = (band->burst_size + band->rate) * 1000;
-		/* Start with a full bucket. */
-		band->bucket = band_max_delta_t;
+		band->bucket = (band->burst_size + band->rate) * 1000;
+		band_max_delta_t = band->bucket / band->rate;
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
 		band++;
-- 
cgit v1.2.3-59-g8ed1b


From bf2ae2e4bf9360e07c0cdfa166bcdc0afd92f4ce Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 10 Mar 2018 18:57:50 +0800
Subject: sock_diag: request _diag module only when the family or proto has
 been registered

Now when using 'ss' in iproute, kernel would try to load all _diag
modules, which also causes corresponding family and proto modules
to be loaded as well due to module dependencies.

Like after running 'ss', sctp, dccp, af_packet (if it works as a module)
would be loaded.

For example:

  $ lsmod|grep sctp
  $ ss
  $ lsmod|grep sctp
  sctp_diag              16384  0
  sctp                  323584  5 sctp_diag
  inet_diag              24576  4 raw_diag,tcp_diag,sctp_diag,udp_diag
  libcrc32c              16384  3 nf_conntrack,nf_nat,sctp

As these family and proto modules are loaded unintentionally, it
could cause some problems, like:

- Some debug tools use 'ss' to collect the socket info, which loads all
  those diag and family and protocol modules. It's noisy for identifying
  issues.

- Users usually expect to drop sctp init packet silently when they
  have no sense of sctp protocol instead of sending abort back.

- It wastes resources (especially with multiple netns), and SCTP module
  can't be unloaded once it's loaded.

...

In short, it's really inappropriate to have these family and proto
modules loaded unexpectedly when just doing debugging with inet_diag.

This patch is to introduce sock_load_diag_module() where it loads
the _diag module only when it's corresponding family or proto has
been already registered.

Note that we can't just load _diag module without the family or
proto loaded, as some symbols used in _diag module are from the
family or proto module.

v1->v2:
  - move inet proto check to inet_diag to avoid a compiling err.
v2->v3:
  - define sock_load_diag_module in sock.c and export one symbol
    only.
  - improve the changelog.

Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Phil Sutter <phil@nwl.cc>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h  |  1 +
 include/net/sock.h   |  1 +
 net/core/sock.c      | 21 +++++++++++++++++++++
 net/core/sock_diag.c | 12 ++++--------
 net/ipv4/inet_diag.c |  3 +--
 net/socket.c         |  5 +++++
 6 files changed, 33 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/linux/net.h b/include/linux/net.h
index 91216b16feb7..2a0391eea05c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -222,6 +222,7 @@ enum {
 int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
+bool sock_is_registered(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
 		  struct socket **res, int kern);
 int sock_create(int family, int type, int proto, struct socket **res);
diff --git a/include/net/sock.h b/include/net/sock.h
index 169c92afcafa..ae23f3b389ca 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1137,6 +1137,7 @@ struct proto {
 
 int proto_register(struct proto *prot, int alloc_slab);
 void proto_unregister(struct proto *prot);
+int sock_load_diag_module(int family, int protocol);
 
 #ifdef SOCK_REFCNT_DEBUG
 static inline void sk_refcnt_debug_inc(struct sock *sk)
diff --git a/net/core/sock.c b/net/core/sock.c
index c501499a04fe..85b0b64e7f9d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3261,6 +3261,27 @@ void proto_unregister(struct proto *prot)
 }
 EXPORT_SYMBOL(proto_unregister);
 
+int sock_load_diag_module(int family, int protocol)
+{
+	if (!protocol) {
+		if (!sock_is_registered(family))
+			return -ENOENT;
+
+		return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+				      NETLINK_SOCK_DIAG, family);
+	}
+
+#ifdef CONFIG_INET
+	if (family == AF_INET &&
+	    !rcu_access_pointer(inet_protos[protocol]))
+		return -ENOENT;
+#endif
+
+	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
+			      NETLINK_SOCK_DIAG, family, protocol);
+}
+EXPORT_SYMBOL(sock_load_diag_module);
+
 #ifdef CONFIG_PROC_FS
 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(proto_list_mutex)
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 146b50e30659..c37b5be7c5e4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -220,8 +220,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return -EINVAL;
 
 	if (sock_diag_handlers[req->sdiag_family] == NULL)
-		request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-				NETLINK_SOCK_DIAG, req->sdiag_family);
+		sock_load_diag_module(req->sdiag_family, 0);
 
 	mutex_lock(&sock_diag_table_mutex);
 	hndl = sock_diag_handlers[req->sdiag_family];
@@ -247,8 +246,7 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	case TCPDIAG_GETSOCK:
 	case DCCPDIAG_GETSOCK:
 		if (inet_rcv_compat == NULL)
-			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-					NETLINK_SOCK_DIAG, AF_INET);
+			sock_load_diag_module(AF_INET, 0);
 
 		mutex_lock(&sock_diag_table_mutex);
 		if (inet_rcv_compat != NULL)
@@ -281,14 +279,12 @@ static int sock_diag_bind(struct net *net, int group)
 	case SKNLGRP_INET_TCP_DESTROY:
 	case SKNLGRP_INET_UDP_DESTROY:
 		if (!sock_diag_handlers[AF_INET])
-			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-				       NETLINK_SOCK_DIAG, AF_INET);
+			sock_load_diag_module(AF_INET, 0);
 		break;
 	case SKNLGRP_INET6_TCP_DESTROY:
 	case SKNLGRP_INET6_UDP_DESTROY:
 		if (!sock_diag_handlers[AF_INET6])
-			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-				       NETLINK_SOCK_DIAG, AF_INET6);
+			sock_load_diag_module(AF_INET6, 0);
 		break;
 	}
 	return 0;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a383f299ce24..4e5bc4b2f14e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -53,8 +53,7 @@ static DEFINE_MUTEX(inet_diag_table_mutex);
 static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
 {
 	if (!inet_diag_table[proto])
-		request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
-			       NETLINK_SOCK_DIAG, AF_INET, proto);
+		sock_load_diag_module(AF_INET, proto);
 
 	mutex_lock(&inet_diag_table_mutex);
 	if (!inet_diag_table[proto])
diff --git a/net/socket.c b/net/socket.c
index a93c99b518ca..08847c3b8c39 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2587,6 +2587,11 @@ void sock_unregister(int family)
 }
 EXPORT_SYMBOL(sock_unregister);
 
+bool sock_is_registered(int family)
+{
+	return family < NPROTO && rcu_access_pointer(net_families[family]);
+}
+
 static int __init sock_init(void)
 {
 	int err;
-- 
cgit v1.2.3-59-g8ed1b


From 2f987a76a97773beafbc615b9c4d8fe79129a7f4 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 12 Mar 2018 14:54:23 +0100
Subject: net: ipv6: keep sk status consistent after datagram connect failure

On unsuccesful ip6_datagram_connect(), if the failure is caused by
ip6_datagram_dst_update(), the sk peer information are cleared, but
the sk->sk_state is preserved.

If the socket was already in an established status, the overall sk
status is inconsistent and fouls later checks in datagram code.

Fix this saving the old peer information and restoring them in
case of failure. This also aligns ipv6 datagram connect() behavior
with ipv4.

v1 -> v2:
 - added missing Fixes tag

Fixes: 85cb73ff9b74 ("net: ipv6: reset daddr and dport in sk if connect() fails")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fbf08ce3f5ab..8a9ac2d0f5d3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -146,10 +146,12 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock	*inet = inet_sk(sk);
 	struct ipv6_pinfo	*np = inet6_sk(sk);
-	struct in6_addr		*daddr;
+	struct in6_addr		*daddr, old_daddr;
+	__be32			fl6_flowlabel = 0;
+	__be32			old_fl6_flowlabel;
+	__be32			old_dport;
 	int			addr_type;
 	int			err;
-	__be32			fl6_flowlabel = 0;
 
 	if (usin->sin6_family == AF_INET) {
 		if (__ipv6_only_sock(sk))
@@ -238,9 +240,13 @@ ipv4_connected:
 		}
 	}
 
+	/* save the current peer information before updating it */
+	old_daddr = sk->sk_v6_daddr;
+	old_fl6_flowlabel = np->flow_label;
+	old_dport = inet->inet_dport;
+
 	sk->sk_v6_daddr = *daddr;
 	np->flow_label = fl6_flowlabel;
-
 	inet->inet_dport = usin->sin6_port;
 
 	/*
@@ -250,11 +256,12 @@ ipv4_connected:
 
 	err = ip6_datagram_dst_update(sk, true);
 	if (err) {
-		/* Reset daddr and dport so that udp_v6_early_demux()
-		 * fails to find this socket
+		/* Restore the socket peer info, to keep it consistent with
+		 * the old socket state
 		 */
-		memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
-		inet->inet_dport = 0;
+		sk->sk_v6_daddr = old_daddr;
+		np->flow_label = old_fl6_flowlabel;
+		inet->inet_dport = old_dport;
 		goto out;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From b954f94023dcc61388c8384f0f14eb8e42c863c5 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 12 Mar 2018 14:54:24 +0100
Subject: l2tp: fix races with ipv4-mapped ipv6 addresses

The l2tp_tunnel_create() function checks for v4mapped ipv6
sockets and cache that flag, so that l2tp core code can
reusing it at xmit time.

If the socket is provided by the userspace, the connection
status of the tunnel sockets can change between the tunnel
creation and the xmit call, so that syzbot is able to
trigger the following splat:

BUG: KASAN: use-after-free in ip6_dst_idev include/net/ip6_fib.h:192
[inline]
BUG: KASAN: use-after-free in ip6_xmit+0x1f76/0x2260
net/ipv6/ip6_output.c:264
Read of size 8 at addr ffff8801bd949318 by task syz-executor4/23448

CPU: 0 PID: 23448 Comm: syz-executor4 Not tainted 4.16.0-rc4+ #65
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:17 [inline]
  dump_stack+0x194/0x24d lib/dump_stack.c:53
  print_address_description+0x73/0x250 mm/kasan/report.c:256
  kasan_report_error mm/kasan/report.c:354 [inline]
  kasan_report+0x23c/0x360 mm/kasan/report.c:412
  __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
  ip6_dst_idev include/net/ip6_fib.h:192 [inline]
  ip6_xmit+0x1f76/0x2260 net/ipv6/ip6_output.c:264
  inet6_csk_xmit+0x2fc/0x580 net/ipv6/inet6_connection_sock.c:139
  l2tp_xmit_core net/l2tp/l2tp_core.c:1053 [inline]
  l2tp_xmit_skb+0x105f/0x1410 net/l2tp/l2tp_core.c:1148
  pppol2tp_sendmsg+0x470/0x670 net/l2tp/l2tp_ppp.c:341
  sock_sendmsg_nosec net/socket.c:630 [inline]
  sock_sendmsg+0xca/0x110 net/socket.c:640
  ___sys_sendmsg+0x767/0x8b0 net/socket.c:2046
  __sys_sendmsg+0xe5/0x210 net/socket.c:2080
  SYSC_sendmsg net/socket.c:2091 [inline]
  SyS_sendmsg+0x2d/0x50 net/socket.c:2087
  do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x453e69
RSP: 002b:00007f819593cc68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f819593d6d4 RCX: 0000000000453e69
RDX: 0000000000000081 RSI: 000000002037ffc8 RDI: 0000000000000004
RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000000004c3 R14: 00000000006f72e8 R15: 0000000000000000

This change addresses the issues:
* explicitly checking for TCP_ESTABLISHED for user space provided sockets
* dropping the v4mapped flag usage - it can become outdated - and
  explicitly invoking ipv6_addr_v4mapped() instead

The issue is apparently there since ancient times.

v1 -> v2: (many thanks to Guillaume)
 - with csum issue introduced in v1
 - replace pr_err with pr_debug
 - fix build issue with IPV6 disabled
 - move l2tp_sk_is_v4mapped in l2tp_core.c

v2 -> v3:
 - don't update inet_daddr for v4mapped address, unneeded
 - drop rendundant check at creation time

Reported-and-tested-by: syzbot+92fa328176eb07e4ac1a@syzkaller.appspotmail.com
Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 38 ++++++++++++++++++--------------------
 net/l2tp/l2tp_core.h |  3 ---
 2 files changed, 18 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e22512e32827..14b67dfacc4b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -111,6 +111,13 @@ struct l2tp_net {
 	spinlock_t l2tp_session_hlist_lock;
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
+static bool l2tp_sk_is_v6(struct sock *sk)
+{
+	return sk->sk_family == PF_INET6 &&
+	       !ipv6_addr_v4mapped(&sk->sk_v6_daddr);
+}
+#endif
 
 static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
 {
@@ -1049,7 +1056,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	/* Queue the packet to IP for output */
 	skb->ignore_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
+	if (l2tp_sk_is_v6(tunnel->sock))
 		error = inet6_csk_xmit(tunnel->sock, skb, NULL);
 	else
 #endif
@@ -1112,6 +1119,15 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 		goto out_unlock;
 	}
 
+	/* The user-space may change the connection status for the user-space
+	 * provided socket at run time: we must check it under the socket lock
+	 */
+	if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) {
+		kfree_skb(skb);
+		ret = NET_XMIT_DROP;
+		goto out_unlock;
+	}
+
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst_clone(__sk_dst_check(sk, 0)));
@@ -1131,7 +1147,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 		/* Calculate UDP checksum if configured to do so */
 #if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
+		if (l2tp_sk_is_v6(sk))
 			udp6_set_csum(udp_get_no_check6_tx(sk),
 				      skb, &inet6_sk(sk)->saddr,
 				      &sk->sk_v6_daddr, udp_len);
@@ -1511,24 +1527,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	if (cfg != NULL)
 		tunnel->debug = cfg->debug;
 
-#if IS_ENABLED(CONFIG_IPV6)
-	if (sk->sk_family == PF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
-
-		if (ipv6_addr_v4mapped(&np->saddr) &&
-		    ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {
-			struct inet_sock *inet = inet_sk(sk);
-
-			tunnel->v4mapped = true;
-			inet->inet_saddr = np->saddr.s6_addr32[3];
-			inet->inet_rcv_saddr = sk->sk_v6_rcv_saddr.s6_addr32[3];
-			inet->inet_daddr = sk->sk_v6_daddr.s6_addr32[3];
-		} else {
-			tunnel->v4mapped = false;
-		}
-	}
-#endif
-
 	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
 	tunnel->encap = encap;
 	if (encap == L2TP_ENCAPTYPE_UDP) {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a1aa9550f04e..2718d0b284d0 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -188,9 +188,6 @@ struct l2tp_tunnel {
 	struct sock		*sock;		/* Parent socket */
 	int			fd;		/* Parent fd, if tunnel socket
 						 * was created by userspace */
-#if IS_ENABLED(CONFIG_IPV6)
-	bool			v4mapped;
-#endif
 
 	struct work_struct	del_work;
 
-- 
cgit v1.2.3-59-g8ed1b


From 5a9f8df68ee6927f21dd3f2c75c16feb8b53a9e8 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 12 Mar 2018 16:00:40 -0700
Subject: net: dsa: Fix dsa_is_user_port() test inversion

During the conversion to dsa_is_user_port(), a condition ended up being
reversed, which would prevent the creation of any user port when using
the legacy binding and/or platform data, fix that.

Fixes: 4a5b85ffe2a0 ("net: dsa: use dsa_is_user_port everywhere")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/legacy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index cb54b81d0bd9..42a7b85b84e1 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -194,7 +194,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
 		ds->ports[i].dn = cd->port_dn[i];
 		ds->ports[i].cpu_dp = dst->cpu_dp;
 
-		if (dsa_is_user_port(ds, i))
+		if (!dsa_is_user_port(ds, i))
 			continue;
 
 		ret = dsa_slave_create(&ds->ports[i]);
-- 
cgit v1.2.3-59-g8ed1b


From 0dcd7876029b58770f769cbb7b484e88e4a305e5 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann@google.com>
Date: Wed, 7 Mar 2018 14:42:53 -0800
Subject: net: xfrm: use preempt-safe this_cpu_read() in ipcomp_alloc_tfms()

f7c83bcbfaf5 ("net: xfrm: use __this_cpu_read per-cpu helper") added a
__this_cpu_read() call inside ipcomp_alloc_tfms().

At the time, __this_cpu_read() required the caller to either not care
about races or to handle preemption/interrupt issues.  3.15 tightened
the rules around some per-cpu operations, and now __this_cpu_read()
should never be used in a preemptible context.  On 3.15 and later, we
need to use this_cpu_read() instead.

syzkaller reported this leading to the following kernel BUG while
fuzzing sendmsg:

BUG: using __this_cpu_read() in preemptible [00000000] code: repro/3101
caller is ipcomp_init_state+0x185/0x990
CPU: 3 PID: 3101 Comm: repro Not tainted 4.16.0-rc4-00123-g86f84779d8e9 #154
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Call Trace:
 dump_stack+0xb9/0x115
 check_preemption_disabled+0x1cb/0x1f0
 ipcomp_init_state+0x185/0x990
 ? __xfrm_init_state+0x876/0xc20
 ? lock_downgrade+0x5e0/0x5e0
 ipcomp4_init_state+0xaa/0x7c0
 __xfrm_init_state+0x3eb/0xc20
 xfrm_init_state+0x19/0x60
 pfkey_add+0x20df/0x36f0
 ? pfkey_broadcast+0x3dd/0x600
 ? pfkey_sock_destruct+0x340/0x340
 ? pfkey_seq_stop+0x80/0x80
 ? __skb_clone+0x236/0x750
 ? kmem_cache_alloc+0x1f6/0x260
 ? pfkey_sock_destruct+0x340/0x340
 ? pfkey_process+0x62a/0x6f0
 pfkey_process+0x62a/0x6f0
 ? pfkey_send_new_mapping+0x11c0/0x11c0
 ? mutex_lock_io_nested+0x1390/0x1390
 pfkey_sendmsg+0x383/0x750
 ? dump_sp+0x430/0x430
 sock_sendmsg+0xc0/0x100
 ___sys_sendmsg+0x6c8/0x8b0
 ? copy_msghdr_from_user+0x3b0/0x3b0
 ? pagevec_lru_move_fn+0x144/0x1f0
 ? find_held_lock+0x32/0x1c0
 ? do_huge_pmd_anonymous_page+0xc43/0x11e0
 ? lock_downgrade+0x5e0/0x5e0
 ? get_kernel_page+0xb0/0xb0
 ? _raw_spin_unlock+0x29/0x40
 ? do_huge_pmd_anonymous_page+0x400/0x11e0
 ? __handle_mm_fault+0x553/0x2460
 ? __fget_light+0x163/0x1f0
 ? __sys_sendmsg+0xc7/0x170
 __sys_sendmsg+0xc7/0x170
 ? SyS_shutdown+0x1a0/0x1a0
 ? __do_page_fault+0x5a0/0xca0
 ? lock_downgrade+0x5e0/0x5e0
 SyS_sendmsg+0x27/0x40
 ? __sys_sendmsg+0x170/0x170
 do_syscall_64+0x19f/0x640
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x7f0ee73dfb79
RSP: 002b:00007ffe14fc15a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0ee73dfb79
RDX: 0000000000000000 RSI: 00000000208befc8 RDI: 0000000000000004
RBP: 00007ffe14fc15b0 R08: 00007ffe14fc15c0 R09: 00007ffe14fc15c0
R10: 0000000000000000 R11: 0000000000000207 R12: 0000000000400440
R13: 00007ffe14fc16b0 R14: 0000000000000000 R15: 0000000000000000

Signed-off-by: Greg Hackmann <ghackmann@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_ipcomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index ccfdc7115a83..a00ec715aa46 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -283,7 +283,7 @@ static struct crypto_comp * __percpu *ipcomp_alloc_tfms(const char *alg_name)
 		struct crypto_comp *tfm;
 
 		/* This can be any valid CPU ID so we don't need locking. */
-		tfm = __this_cpu_read(*pos->tfms);
+		tfm = this_cpu_read(*pos->tfms);
 
 		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
 			pos->users++;
-- 
cgit v1.2.3-59-g8ed1b


From 4dcb31d4649df36297296b819437709f5407059c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 14 Mar 2018 09:04:16 -0700
Subject: net: use skb_to_full_sk() in skb_update_prio()

Andrei Vagin reported a KASAN: slab-out-of-bounds error in
skb_update_prio()

Since SYNACK might be attached to a request socket, we need to
get back to the listener socket.
Since this listener is manipulated without locks, add const
qualifiers to sock_cgroup_prioidx() so that the const can also
be used in skb_update_prio()

Also add the const qualifier to sock_cgroup_classid() for consistency.

Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cgroup-defs.h |  4 ++--
 net/core/dev.c              | 22 +++++++++++++++-------
 2 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 9f242b876fde..f8e76d01a5ad 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -755,13 +755,13 @@ struct sock_cgroup_data {
  * updaters and return part of the previous pointer as the prioidx or
  * classid.  Such races are short-lived and the result isn't critical.
  */
-static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd)
+static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
 {
 	/* fallback to 1 which is always the ID of the root cgroup */
 	return (skcd->is_data & 1) ? skcd->prioidx : 1;
 }
 
-static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd)
+static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
 {
 	/* fallback to 0 which is the unconfigured default classid */
 	return (skcd->is_data & 1) ? skcd->classid : 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index 2cedf520cb28..12be20535714 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3278,15 +3278,23 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
 static void skb_update_prio(struct sk_buff *skb)
 {
-	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
+	const struct netprio_map *map;
+	const struct sock *sk;
+	unsigned int prioidx;
 
-	if (!skb->priority && skb->sk && map) {
-		unsigned int prioidx =
-			sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
+	if (skb->priority)
+		return;
+	map = rcu_dereference_bh(skb->dev->priomap);
+	if (!map)
+		return;
+	sk = skb_to_full_sk(skb);
+	if (!sk)
+		return;
 
-		if (prioidx < map->priomap_len)
-			skb->priority = map->priomap[prioidx];
-	}
+	prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
+
+	if (prioidx < map->priomap_len)
+		skb->priority = map->priomap[prioidx];
 }
 #else
 #define skb_update_prio(skb)
-- 
cgit v1.2.3-59-g8ed1b


From d52e5a7e7ca49457dd31fc8b42fb7c0d58a31221 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 14 Mar 2018 10:21:14 +0100
Subject: ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu

Prior to the rework of PMTU information storage in commit
2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer."),
when a PMTU event advertising a PMTU smaller than
net.ipv4.route.min_pmtu was received, we would disable setting the DF
flag on packets by locking the MTU metric, and set the PMTU to
net.ipv4.route.min_pmtu.

Since then, we don't disable DF, and set PMTU to
net.ipv4.route.min_pmtu, so the intermediate router that has this link
with a small MTU will have to drop the packets.

This patch reestablishes pre-2.6.39 behavior by splitting
rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu.
rt_mtu_locked indicates that we shouldn't set the DF bit on that path,
and is checked in ip_dont_fragment().

One possible workaround is to set net.ipv4.route.min_pmtu to a value low
enough to accommodate the lowest MTU encountered.

Fixes: 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h        | 11 +++++++++--
 include/net/ip_fib.h    |  1 +
 include/net/route.h     |  3 ++-
 net/ipv4/route.c        | 26 +++++++++++++++++++-------
 net/ipv4/xfrm4_policy.c |  1 +
 5 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 746abff9ce51..f49b3a576bec 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -328,6 +328,13 @@ int ip_decrease_ttl(struct iphdr *iph)
 	return --iph->ttl;
 }
 
+static inline int ip_mtu_locked(const struct dst_entry *dst)
+{
+	const struct rtable *rt = (const struct rtable *)dst;
+
+	return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
+}
+
 static inline
 int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
 {
@@ -335,7 +342,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
 
 	return  pmtudisc == IP_PMTUDISC_DO ||
 		(pmtudisc == IP_PMTUDISC_WANT &&
-		 !(dst_metric_locked(dst, RTAX_MTU)));
+		 !ip_mtu_locked(dst));
 }
 
 static inline bool ip_sk_accept_pmtu(const struct sock *sk)
@@ -361,7 +368,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 	struct net *net = dev_net(dst->dev);
 
 	if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
-	    dst_metric_locked(dst, RTAX_MTU) ||
+	    ip_mtu_locked(dst) ||
 	    !forwarding)
 		return dst_mtu(dst);
 
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f80524396c06..77d0a78cf7d2 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -59,6 +59,7 @@ struct fib_nh_exception {
 	int				fnhe_genid;
 	__be32				fnhe_daddr;
 	u32				fnhe_pmtu;
+	bool				fnhe_mtu_locked;
 	__be32				fnhe_gw;
 	unsigned long			fnhe_expires;
 	struct rtable __rcu		*fnhe_rth_input;
diff --git a/include/net/route.h b/include/net/route.h
index 40b870d58f38..20a92ca9e115 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -63,7 +63,8 @@ struct rtable {
 	__be32			rt_gateway;
 
 	/* Miscellaneous cached information */
-	u32			rt_pmtu;
+	u32			rt_mtu_locked:1,
+				rt_pmtu:31;
 
 	u32			rt_table_id;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f9dbb8cb66bf..299e247b2032 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -634,6 +634,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
 static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
 {
 	rt->rt_pmtu = fnhe->fnhe_pmtu;
+	rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
 	rt->dst.expires = fnhe->fnhe_expires;
 
 	if (fnhe->fnhe_gw) {
@@ -644,7 +645,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 }
 
 static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
-				  u32 pmtu, unsigned long expires)
+				  u32 pmtu, bool lock, unsigned long expires)
 {
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
@@ -681,8 +682,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 			fnhe->fnhe_genid = genid;
 		if (gw)
 			fnhe->fnhe_gw = gw;
-		if (pmtu)
+		if (pmtu) {
 			fnhe->fnhe_pmtu = pmtu;
+			fnhe->fnhe_mtu_locked = lock;
+		}
 		fnhe->fnhe_expires = max(1UL, expires);
 		/* Update all cached dsts too */
 		rt = rcu_dereference(fnhe->fnhe_rth_input);
@@ -706,6 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 		fnhe->fnhe_daddr = daddr;
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
+		fnhe->fnhe_mtu_locked = lock;
 		fnhe->fnhe_expires = expires;
 
 		/* Exception created; mark the cached routes for the nexthop
@@ -787,7 +791,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 				struct fib_nh *nh = &FIB_RES_NH(res);
 
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
-						0, jiffies + ip_rt_gc_timeout);
+						0, false,
+						jiffies + ip_rt_gc_timeout);
 			}
 			if (kill_route)
 				rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1009,15 +1014,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 {
 	struct dst_entry *dst = &rt->dst;
 	struct fib_result res;
+	bool lock = false;
 
-	if (dst_metric_locked(dst, RTAX_MTU))
+	if (ip_mtu_locked(dst))
 		return;
 
 	if (ipv4_mtu(dst) < mtu)
 		return;
 
-	if (mtu < ip_rt_min_pmtu)
+	if (mtu < ip_rt_min_pmtu) {
+		lock = true;
 		mtu = ip_rt_min_pmtu;
+	}
 
 	if (rt->rt_pmtu == mtu &&
 	    time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
@@ -1027,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
 		struct fib_nh *nh = &FIB_RES_NH(res);
 
-		update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
+		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
 				      jiffies + ip_rt_mtu_expires);
 	}
 	rcu_read_unlock();
@@ -1280,7 +1288,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 
 	mtu = READ_ONCE(dst->dev->mtu);
 
-	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+	if (unlikely(ip_mtu_locked(dst))) {
 		if (rt->rt_uses_gateway && mtu > 576)
 			mtu = 576;
 	}
@@ -1521,6 +1529,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_is_input = 0;
 		rt->rt_iif = 0;
 		rt->rt_pmtu = 0;
+		rt->rt_mtu_locked = 0;
 		rt->rt_gateway = 0;
 		rt->rt_uses_gateway = 0;
 		rt->rt_table_id = 0;
@@ -2546,6 +2555,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_is_input = ort->rt_is_input;
 		rt->rt_iif = ort->rt_iif;
 		rt->rt_pmtu = ort->rt_pmtu;
+		rt->rt_mtu_locked = ort->rt_mtu_locked;
 
 		rt->rt_genid = rt_genid_ipv4(net);
 		rt->rt_flags = ort->rt_flags;
@@ -2648,6 +2658,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
 	if (rt->rt_pmtu && expires)
 		metrics[RTAX_MTU - 1] = rt->rt_pmtu;
+	if (rt->rt_mtu_locked && expires)
+		metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
 	if (rtnetlink_put_metrics(skb, metrics) < 0)
 		goto nla_put_failure;
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8d33f7b311f4..fbebda67ac1b 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_gateway = rt->rt_gateway;
 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+	xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
 	xdst->u.rt.rt_table_id = rt->rt_table_id;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 	rt_add_uncached_list(&xdst->u.rt);
-- 
cgit v1.2.3-59-g8ed1b


From 3d502067599f0db12e74e6646aee8728efe3e5be Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Tue, 13 Mar 2018 10:41:54 +0100
Subject: net/smc: simplify wait when closing listen socket

Closing of a listen socket wakes up kernel_accept() of
smc_tcp_listen_worker(), and then has to wait till smc_tcp_listen_worker()
gives up the internal clcsock. The wait logic introduced with
commit 127f49705823 ("net/smc: release clcsock from tcp_listen_worker")
might wait longer than necessary. This patch implements the idea to
implement the wait just with flush_work(), and gets rid of the extra
smc_close_wait_listen_clcsock() function.

Fixes: 127f49705823 ("net/smc: release clcsock from tcp_listen_worker")
Reported-by: Hans Wippel <hwippel@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c    |  4 ----
 net/smc/smc_close.c | 25 +++----------------------
 2 files changed, 3 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8cc97834d4f6..1e0d780855c3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -978,10 +978,6 @@ out:
 		lsmc->clcsock = NULL;
 	}
 	release_sock(lsk);
-	/* no more listening, wake up smc_close_wait_listen_clcsock and
-	 * accept
-	 */
-	lsk->sk_state_change(lsk);
 	sock_put(&lsmc->sk); /* sock_hold in smc_listen */
 }
 
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index e339c0186dcf..fa41d9881741 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -30,27 +30,6 @@ static void smc_close_cleanup_listen(struct sock *parent)
 		smc_close_non_accepted(sk);
 }
 
-static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct sock *sk = &smc->sk;
-	signed long timeout;
-
-	timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
-	add_wait_queue(sk_sleep(sk), &wait);
-	do {
-		release_sock(sk);
-		if (smc->clcsock)
-			timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
-					     timeout);
-		sched_annotate_sleep();
-		lock_sock(sk);
-		if (!smc->clcsock)
-			break;
-	} while (timeout);
-	remove_wait_queue(sk_sleep(sk), &wait);
-}
-
 /* wait for sndbuf data being transmitted */
 static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
 {
@@ -204,9 +183,11 @@ again:
 			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
 			/* wake up kernel_accept of smc_tcp_listen_worker */
 			smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
-			smc_close_wait_listen_clcsock(smc);
 		}
 		smc_close_cleanup_listen(sk);
+		release_sock(sk);
+		flush_work(&smc->tcp_listen_work);
+		lock_sock(sk);
 		break;
 	case SMC_ACTIVE:
 		smc_close_stream_wait(smc, timeout);
-- 
cgit v1.2.3-59-g8ed1b


From 51d4740f88affd85d49c04e3c9cd129c0e33bcb9 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Mon, 12 Mar 2018 16:20:58 -0400
Subject: net sched actions: return explicit error when tunnel_key mode is not
 specified

If set/unset mode of the tunnel_key action is not provided, ->init() still
returns 0, and the caller proceeds with bogus 'struct tc_action *' object,
this results in crash:

% tc actions add action tunnel_key src_ip 1.1.1.1 dst_ip 2.2.2.1 id 7 index 1

[   35.805515] general protection fault: 0000 [#1] SMP PTI
[   35.806161] Modules linked in: act_tunnel_key kvm_intel kvm irqbypass
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64
crypto_simd glue_helper cryptd serio_raw
[   35.808233] CPU: 1 PID: 428 Comm: tc Not tainted 4.16.0-rc4+ #286
[   35.808929] RIP: 0010:tcf_action_init+0x90/0x190
[   35.809457] RSP: 0018:ffffb8edc068b9a0 EFLAGS: 00010206
[   35.810053] RAX: 1320c000000a0003 RBX: 0000000000000001 RCX: 0000000000000000
[   35.810866] RDX: 0000000000000070 RSI: 0000000000007965 RDI: ffffb8edc068b910
[   35.811660] RBP: ffffb8edc068b9d0 R08: 0000000000000000 R09: ffffb8edc068b808
[   35.812463] R10: ffffffffc02bf040 R11: 0000000000000040 R12: ffffb8edc068bb38
[   35.813235] R13: 0000000000000000 R14: 0000000000000000 R15: ffffb8edc068b910
[   35.814006] FS:  00007f3d0d8556c0(0000) GS:ffff91d1dbc40000(0000)
knlGS:0000000000000000
[   35.814881] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   35.815540] CR2: 000000000043f720 CR3: 0000000019248001 CR4: 00000000001606a0
[   35.816457] Call Trace:
[   35.817158]  tc_ctl_action+0x11a/0x220
[   35.817795]  rtnetlink_rcv_msg+0x23d/0x2e0
[   35.818457]  ? __slab_alloc+0x1c/0x30
[   35.819079]  ? __kmalloc_node_track_caller+0xb1/0x2b0
[   35.819544]  ? rtnl_calcit.isra.30+0xe0/0xe0
[   35.820231]  netlink_rcv_skb+0xce/0x100
[   35.820744]  netlink_unicast+0x164/0x220
[   35.821500]  netlink_sendmsg+0x293/0x370
[   35.822040]  sock_sendmsg+0x30/0x40
[   35.822508]  ___sys_sendmsg+0x2c5/0x2e0
[   35.823149]  ? pagecache_get_page+0x27/0x220
[   35.823714]  ? filemap_fault+0xa2/0x640
[   35.824423]  ? page_add_file_rmap+0x108/0x200
[   35.825065]  ? alloc_set_pte+0x2aa/0x530
[   35.825585]  ? finish_fault+0x4e/0x70
[   35.826140]  ? __handle_mm_fault+0xbc1/0x10d0
[   35.826723]  ? __sys_sendmsg+0x41/0x70
[   35.827230]  __sys_sendmsg+0x41/0x70
[   35.827710]  do_syscall_64+0x68/0x120
[   35.828195]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[   35.828859] RIP: 0033:0x7f3d0ca4da67
[   35.829331] RSP: 002b:00007ffc9f284338 EFLAGS: 00000246 ORIG_RAX:
000000000000002e
[   35.830304] RAX: ffffffffffffffda RBX: 00007ffc9f284460 RCX: 00007f3d0ca4da67
[   35.831247] RDX: 0000000000000000 RSI: 00007ffc9f2843b0 RDI: 0000000000000003
[   35.832167] RBP: 000000005aa6a7a9 R08: 0000000000000001 R09: 0000000000000000
[   35.833075] R10: 00000000000005f1 R11: 0000000000000246 R12: 0000000000000000
[   35.833997] R13: 00007ffc9f2884c0 R14: 0000000000000001 R15: 0000000000674640
[   35.834923] Code: 24 30 bb 01 00 00 00 45 31 f6 eb 5e 8b 50 08 83 c2 07 83 e2
fc 83 c2 70 49 8b 07 48 8b 40 70 48 85 c0 74 10 48 89 14 24 4c 89 ff <ff> d0 48
8b 14 24 48 01 c2 49 01 d6 45 85 ed 74 05 41 83 47 2c
[   35.837442] RIP: tcf_action_init+0x90/0x190 RSP: ffffb8edc068b9a0
[   35.838291] ---[ end trace a095c06ee4b97a26 ]---

Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key")
Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_tunnel_key.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 0e23aac09ad6..fea772e66e62 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -153,6 +153,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
 		break;
 	default:
+		ret = -EINVAL;
 		goto err_out;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 4bbb3e0e8239f9079bf1fe20b3c0cb598714ae61 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Tue, 13 Mar 2018 14:51:27 +0900
Subject: net: Fix vlan untag for bridge and vlan_dev with reorder_hdr off

When we have a bridge with vlan_filtering on and a vlan device on top of
it, packets would be corrupted in skb_vlan_untag() called from
br_dev_xmit().

The problem sits in skb_reorder_vlan_header() used in skb_vlan_untag(),
which makes use of skb->mac_len. In this function mac_len is meant for
handling rx path with vlan devices with reorder_header disabled, but in
tx path mac_len is typically 0 and cannot be used, which is the problem
in this case.

The current code even does not properly handle rx path (skb_vlan_untag()
called from __netif_receive_skb_core()) with reorder_header off actually.

In rx path single tag case, it works as follows:

- Before skb_reorder_vlan_header()

 mac_header                                data
   v                                        v
   +-------------------+-------------+------+----
   |        ETH        |    VLAN     | ETH  |
   |       ADDRS       | TPID | TCI  | TYPE |
   +-------------------+-------------+------+----
   <-------- mac_len --------->
                       <------------->
                        to be removed

- After skb_reorder_vlan_header()

            mac_header                     data
                 v                          v
                 +-------------------+------+----
                 |        ETH        | ETH  |
                 |       ADDRS       | TYPE |
                 +-------------------+------+----
                 <-------- mac_len --------->

This is ok, but in rx double tag case, it corrupts packets:

- Before skb_reorder_vlan_header()

 mac_header                                              data
   v                                                      v
   +-------------------+-------------+-------------+------+----
   |        ETH        |    VLAN     |    VLAN     | ETH  |
   |       ADDRS       | TPID | TCI  | TPID | TCI  | TYPE |
   +-------------------+-------------+-------------+------+----
   <--------------- mac_len ---------------->
                                     <------------->
                                    should be removed
                       <--------------------------->
                         actually will be removed

- After skb_reorder_vlan_header()

            mac_header                                   data
                 v                                        v
                               +-------------------+------+----
                               |        ETH        | ETH  |
                               |       ADDRS       | TYPE |
                               +-------------------+------+----
                 <--------------- mac_len ---------------->

So, two of vlan tags are both removed while only inner one should be
removed and mac_header (and mac_len) is broken.

skb_vlan_untag() is meant for removing the vlan header at (skb->data - 2),
so use skb->data and skb->mac_header to calculate the right offset.

Reported-by: Brandon Carpenter <brandon.carpenter@cypherpath.com>
Fixes: a6e18ff11170 ("vlan: Fix untag operations of stacked vlans with REORDER_HEADER off")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_ether.h | 1 +
 net/core/skbuff.c             | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 8bbbcb5cd94b..820de5d222d2 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -30,6 +30,7 @@
  */
 
 #define ETH_ALEN	6		/* Octets in one ethernet addr	 */
+#define ETH_TLEN	2		/* Octets in ethernet type field */
 #define ETH_HLEN	14		/* Total octets in header.	 */
 #define ETH_ZLEN	60		/* Min. octets in frame sans FCS */
 #define ETH_DATA_LEN	1500		/* Max. octets in payload	 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index baf990528943..b103f46ec512 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5020,13 +5020,16 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
 
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
+	int mac_len;
+
 	if (skb_cow(skb, skb_headroom(skb)) < 0) {
 		kfree_skb(skb);
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
-		2 * ETH_ALEN);
+	mac_len = skb->data - skb_mac_header(skb);
+	memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
+		mac_len - VLAN_HLEN - ETH_TLEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
-- 
cgit v1.2.3-59-g8ed1b


From cbe7128c4b92e2004984f477fd38dfa81662f02e Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Tue, 13 Mar 2018 14:51:28 +0900
Subject: vlan: Fix out of order vlan headers with reorder header off

With reorder header off, received packets are untagged in skb_vlan_untag()
called from within __netif_receive_skb_core(), and later the tag will be
inserted back in vlan_do_receive().

This caused out of order vlan headers when we create a vlan device on top
of another vlan device, because vlan_do_receive() inserts a tag as the
outermost vlan tag. E.g. the outer tag is first removed in skb_vlan_untag()
and inserted back in vlan_do_receive(), then the inner tag is next removed
and inserted back as the outermost tag.

This patch fixes the behaviour by inserting the inner tag at the right
position.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 66 ++++++++++++++++++++++++++++++++++++++++---------
 net/8021q/vlan_core.c   |  4 +--
 2 files changed, 57 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 5e6a2d4dc366..c4a1cff9c768 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -300,30 +300,34 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features,
 }
 
 /**
- * __vlan_insert_tag - regular VLAN tag inserting
+ * __vlan_insert_inner_tag - inner VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_proto: VLAN encapsulation protocol
  * @vlan_tci: VLAN TCI to insert
+ * @mac_len: MAC header length including outer vlan headers
  *
- * Inserts the VLAN tag into @skb as part of the payload
+ * Inserts the VLAN tag into @skb as part of the payload at offset mac_len
  * Returns error if skb_cow_head failes.
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline int __vlan_insert_tag(struct sk_buff *skb,
-				    __be16 vlan_proto, u16 vlan_tci)
+static inline int __vlan_insert_inner_tag(struct sk_buff *skb,
+					  __be16 vlan_proto, u16 vlan_tci,
+					  unsigned int mac_len)
 {
 	struct vlan_ethhdr *veth;
 
 	if (skb_cow_head(skb, VLAN_HLEN) < 0)
 		return -ENOMEM;
 
-	veth = skb_push(skb, VLAN_HLEN);
+	skb_push(skb, VLAN_HLEN);
 
-	/* Move the mac addresses to the beginning of the new header. */
-	memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
+	/* Move the mac header sans proto to the beginning of the new header. */
+	memmove(skb->data, skb->data + VLAN_HLEN, mac_len - ETH_TLEN);
 	skb->mac_header -= VLAN_HLEN;
 
+	veth = (struct vlan_ethhdr *)(skb->data + mac_len - ETH_HLEN);
+
 	/* first, the ethernet type */
 	veth->h_vlan_proto = vlan_proto;
 
@@ -334,12 +338,30 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
 }
 
 /**
- * vlan_insert_tag - regular VLAN tag inserting
+ * __vlan_insert_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_proto: VLAN encapsulation protocol
  * @vlan_tci: VLAN TCI to insert
  *
  * Inserts the VLAN tag into @skb as part of the payload
+ * Returns error if skb_cow_head failes.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
+ */
+static inline int __vlan_insert_tag(struct sk_buff *skb,
+				    __be16 vlan_proto, u16 vlan_tci)
+{
+	return __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN);
+}
+
+/**
+ * vlan_insert_inner_tag - inner VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_proto: VLAN encapsulation protocol
+ * @vlan_tci: VLAN TCI to insert
+ * @mac_len: MAC header length including outer vlan headers
+ *
+ * Inserts the VLAN tag into @skb as part of the payload at offset mac_len
  * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
  *
  * Following the skb_unshare() example, in case of error, the calling function
@@ -347,12 +369,14 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
-					      __be16 vlan_proto, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb,
+						    __be16 vlan_proto,
+						    u16 vlan_tci,
+						    unsigned int mac_len)
 {
 	int err;
 
-	err = __vlan_insert_tag(skb, vlan_proto, vlan_tci);
+	err = __vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, mac_len);
 	if (err) {
 		dev_kfree_skb_any(skb);
 		return NULL;
@@ -360,6 +384,26 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
 	return skb;
 }
 
+/**
+ * vlan_insert_tag - regular VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_proto: VLAN encapsulation protocol
+ * @vlan_tci: VLAN TCI to insert
+ *
+ * Inserts the VLAN tag into @skb as part of the payload
+ * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
+ */
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
+					      __be16 vlan_proto, u16 vlan_tci)
+{
+	return vlan_insert_inner_tag(skb, vlan_proto, vlan_tci, ETH_HLEN);
+}
+
 /**
  * vlan_insert_tag_set_proto - regular VLAN tag inserting
  * @skb: skbuff to tag
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 64aa9f755e1d..45c9bf5ff3a0 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -48,8 +48,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
 		 * original position later
 		 */
 		skb_push(skb, offset);
-		skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
-					      skb->vlan_tci);
+		skb = *skbp = vlan_insert_inner_tag(skb, skb->vlan_proto,
+						    skb->vlan_tci, skb->mac_len);
 		if (!skb)
 			return false;
 		skb_pull(skb, offset + VLAN_HLEN);
-- 
cgit v1.2.3-59-g8ed1b


From 2cc683e88c0c993ac3721d9b702cb0630abe2879 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@quantonium.net>
Date: Tue, 13 Mar 2018 12:01:43 -0700
Subject: kcm: lock lower socket in kcm_attach

Need to lock lower socket in order to provide mutual exclusion
with kcm_unattach.

v2: Add Reported-by for syzbot

Fixes: ab7ac4eb9832e32a09f4e804 ("kcm: Kernel Connection Multiplexor module")
Reported-by: syzbot+ea75c0ffcd353d32515f064aaebefc5279e6161e@syzkaller.appspotmail.com
Signed-off-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index f297d53a11aa..34355fd19f27 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1381,24 +1381,32 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 		.parse_msg = kcm_parse_func_strparser,
 		.read_sock_done = kcm_read_sock_done,
 	};
-	int err;
+	int err = 0;
 
 	csk = csock->sk;
 	if (!csk)
 		return -EINVAL;
 
+	lock_sock(csk);
+
 	/* Only allow TCP sockets to be attached for now */
 	if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
-	    csk->sk_protocol != IPPROTO_TCP)
-		return -EOPNOTSUPP;
+	    csk->sk_protocol != IPPROTO_TCP) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
 
 	/* Don't allow listeners or closed sockets */
-	if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
-		return -EOPNOTSUPP;
+	if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
 
 	psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
-	if (!psock)
-		return -ENOMEM;
+	if (!psock) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	psock->mux = mux;
 	psock->sk = csk;
@@ -1407,7 +1415,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	err = strp_init(&psock->strp, csk, &cb);
 	if (err) {
 		kmem_cache_free(kcm_psockp, psock);
-		return err;
+		goto out;
 	}
 
 	write_lock_bh(&csk->sk_callback_lock);
@@ -1419,7 +1427,8 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 		write_unlock_bh(&csk->sk_callback_lock);
 		strp_done(&psock->strp);
 		kmem_cache_free(kcm_psockp, psock);
-		return -EALREADY;
+		err = -EALREADY;
+		goto out;
 	}
 
 	psock->save_data_ready = csk->sk_data_ready;
@@ -1455,7 +1464,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	/* Schedule RX work in case there are already bytes queued */
 	strp_check_rcv(&psock->strp);
 
-	return 0;
+out:
+	release_sock(csk);
+
+	return err;
 }
 
 static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
@@ -1507,6 +1519,7 @@ static void kcm_unattach(struct kcm_psock *psock)
 
 	if (WARN_ON(psock->rx_kcm)) {
 		write_unlock_bh(&csk->sk_callback_lock);
+		release_sock(csk);
 		return;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From fa6a91e9b907231d2e38ea5ed89c537b3525df3d Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 13 Mar 2018 16:50:06 +0100
Subject: net/iucv: Free memory obtained by kzalloc

Free memory by calling put_device(), if afiucv_iucv_init is not
successful.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1e8cc7bcbca3..9e2643ab4ccb 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -2433,9 +2433,11 @@ static int afiucv_iucv_init(void)
 	af_iucv_dev->driver = &af_iucv_driver;
 	err = device_register(af_iucv_dev);
 	if (err)
-		goto out_driver;
+		goto out_iucv_dev;
 	return 0;
 
+out_iucv_dev:
+	put_device(af_iucv_dev);
 out_driver:
 	driver_unregister(&af_iucv_driver);
 out_iucv:
-- 
cgit v1.2.3-59-g8ed1b


From 02a2385f37a7c6594c9d89b64c4a1451276f08eb Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 14 Mar 2018 21:10:23 +0100
Subject: netlink: avoid a double skb free in genlmsg_mcast()

nlmsg_multicast() consumes always the skb, thus the original skb must be
freed only when this function is called with a clone.

Fixes: cb9f7a9a5c96 ("netlink: ensure to loop over all netns in genlmsg_multicast_allns()")
Reported-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 6f02499ef007..b9ce82c9440f 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1106,7 +1106,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
 	if (!err)
 		delivered = true;
 	else if (err != -ESRCH)
-		goto error;
+		return err;
 	return delivered ? 0 : -ESRCH;
  error:
 	kfree_skb(skb);
-- 
cgit v1.2.3-59-g8ed1b


From 6e5d58fdc9bedd0255a8781b258f10bbdc63e975 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Wed, 14 Mar 2018 13:32:09 -0700
Subject: skbuff: Fix not waking applications when errors are enqueued

When errors are enqueued to the error queue via sock_queue_err_skb()
function, it is possible that the waiting application is not notified.

Calling 'sk->sk_data_ready()' would not notify applications that
selected only POLLERR events in poll() (for example).

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Randy E. Witt <randy.e.witt@intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b103f46ec512..1e7acdc30732 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4179,7 +4179,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 
 	skb_queue_tail(&sk->sk_error_queue, skb);
 	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk);
+		sk->sk_error_report(sk);
 	return 0;
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
-- 
cgit v1.2.3-59-g8ed1b


From bc44b78157f621ff2a2618fe287a827bcb094ac4 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Fri, 16 Mar 2018 11:29:09 +0100
Subject: batman-adv: update data pointers after skb_cow()

batadv_check_unicast_ttvn() calls skb_cow(), so pointers into the SKB data
must be (re)set after calling it. The ethhdr variable is dropped
altogether.

Fixes: 7cdcf6dddc42 ("batman-adv: add UNICAST_4ADDR packet type")
Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/routing.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index b6891e8b741c..6a9242658c8d 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -968,14 +968,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 	struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL;
 	int check, hdr_size = sizeof(*unicast_packet);
 	enum batadv_subtype subtype;
-	struct ethhdr *ethhdr;
 	int ret = NET_RX_DROP;
 	bool is4addr, is_gw;
 
 	unicast_packet = (struct batadv_unicast_packet *)skb->data;
-	unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
-	ethhdr = eth_hdr(skb);
-
 	is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
 	/* the caller function should have already pulled 2 bytes */
 	if (is4addr)
@@ -995,12 +991,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 	if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
 		goto free_skb;
 
+	unicast_packet = (struct batadv_unicast_packet *)skb->data;
+
 	/* packet for me */
 	if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
 		/* If this is a unicast packet from another backgone gw,
 		 * drop it.
 		 */
-		orig_addr_gw = ethhdr->h_source;
+		orig_addr_gw = eth_hdr(skb)->h_source;
 		orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw);
 		if (orig_node_gw) {
 			is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw,
@@ -1015,6 +1013,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
 		}
 
 		if (is4addr) {
+			unicast_4addr_packet =
+				(struct batadv_unicast_4addr_packet *)skb->data;
 			subtype = unicast_4addr_packet->subtype;
 			batadv_dat_inc_counter(bat_priv, subtype);
 
-- 
cgit v1.2.3-59-g8ed1b


From 6f27d2c2a8c236d296201c19abb8533ec20d212b Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Fri, 16 Mar 2018 11:29:10 +0100
Subject: batman-adv: fix header size check in batadv_dbg_arp()

Checking for 0 is insufficient: when an SKB without a batadv header, but
with a VLAN header is received, hdr_size will be 4, making the following
code interpret the Ethernet header as a batadv header.

Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware")
Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/distributed-arp-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 9703c791ffc5..87cd962d28d5 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -393,7 +393,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		   batadv_arp_hw_src(skb, hdr_size), &ip_src,
 		   batadv_arp_hw_dst(skb, hdr_size), &ip_dst);
 
-	if (hdr_size == 0)
+	if (hdr_size < sizeof(struct batadv_unicast_packet))
 		return;
 
 	unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
-- 
cgit v1.2.3-59-g8ed1b


From cce6294cc2eaa083482e1d85d8db5845c82a7e14 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 14 Mar 2018 18:53:00 -0700
Subject: net: sched: fix uses after free

syzbot reported one use-after-free in pfifo_fast_enqueue() [1]

Issue here is that we can not reuse skb after a successful skb_array_produce()
since another cpu might have consumed it already.

I believe a similar problem exists in try_bulk_dequeue_skb_slow()
in case we put an skb into qdisc_enqueue_skb_bad_txq() for lockless qdisc.

[1]
BUG: KASAN: use-after-free in qdisc_pkt_len include/net/sch_generic.h:610 [inline]
BUG: KASAN: use-after-free in qdisc_qstats_cpu_backlog_inc include/net/sch_generic.h:712 [inline]
BUG: KASAN: use-after-free in pfifo_fast_enqueue+0x4bc/0x5e0 net/sched/sch_generic.c:639
Read of size 4 at addr ffff8801cede37e8 by task syzkaller717588/5543

CPU: 1 PID: 5543 Comm: syzkaller717588 Not tainted 4.16.0-rc4+ #265
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:17 [inline]
 dump_stack+0x194/0x24d lib/dump_stack.c:53
 print_address_description+0x73/0x250 mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report+0x23c/0x360 mm/kasan/report.c:412
 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:432
 qdisc_pkt_len include/net/sch_generic.h:610 [inline]
 qdisc_qstats_cpu_backlog_inc include/net/sch_generic.h:712 [inline]
 pfifo_fast_enqueue+0x4bc/0x5e0 net/sched/sch_generic.c:639
 __dev_xmit_skb net/core/dev.c:3216 [inline]

Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot+ed43b6903ab968b16f54@syzkaller.appspotmail.com
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc:	Cong Wang <xiyou.wangcong@gmail.com>
Cc:	Jiri Pirko <jiri@resnulli.us>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 190570f21b20..7e3fbe9cc936 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -106,6 +106,14 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
 
 	__skb_queue_tail(&q->skb_bad_txq, skb);
 
+	if (qdisc_is_percpu_stats(q)) {
+		qdisc_qstats_cpu_backlog_inc(q, skb);
+		qdisc_qstats_cpu_qlen_inc(q);
+	} else {
+		qdisc_qstats_backlog_inc(q, skb);
+		q->q.qlen++;
+	}
+
 	if (lock)
 		spin_unlock(lock);
 }
@@ -196,14 +204,6 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
 			break;
 		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
 			qdisc_enqueue_skb_bad_txq(q, nskb);
-
-			if (qdisc_is_percpu_stats(q)) {
-				qdisc_qstats_cpu_backlog_inc(q, nskb);
-				qdisc_qstats_cpu_qlen_inc(q);
-			} else {
-				qdisc_qstats_backlog_inc(q, nskb);
-				q->q.qlen++;
-			}
 			break;
 		}
 		skb->next = nskb;
@@ -628,6 +628,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
 	int band = prio2band[skb->priority & TC_PRIO_MAX];
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 	struct skb_array *q = band2list(priv, band);
+	unsigned int pkt_len = qdisc_pkt_len(skb);
 	int err;
 
 	err = skb_array_produce(q, skb);
@@ -636,7 +637,10 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
 		return qdisc_drop_cpu(skb, qdisc, to_free);
 
 	qdisc_qstats_cpu_qlen_inc(qdisc);
-	qdisc_qstats_cpu_backlog_inc(qdisc, skb);
+	/* Note: skb can not be used after skb_array_produce(),
+	 * so we better not use qdisc_qstats_cpu_backlog_inc()
+	 */
+	this_cpu_add(qdisc->cpu_qstats->backlog, pkt_len);
 	return NET_XMIT_SUCCESS;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 1edf8abe04090c4f41a85e42c66638be1ee69156 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 16 Mar 2018 00:00:53 +0100
Subject: net/sched: fix NULL dereference in the error path of tcf_vlan_init()

when the following command

 # tc actions replace action vlan pop index 100

is run for the first time, and tcf_vlan_init() fails allocating struct
tcf_vlan_params, tcf_vlan_cleanup() calls kfree_rcu(NULL, ...). This causes
the following error:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
 IP: __call_rcu+0x23/0x2b0
 PGD 80000000760a2067 P4D 80000000760a2067 PUD 742c1067 PMD 0
 Oops: 0002 [#1] SMP PTI
 Modules linked in: act_vlan(E) ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel mbcache snd_hda_codec jbd2 snd_hda_core crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer glue_helper snd cryptd joydev soundcore virtio_balloon pcspkr i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_console virtio_blk virtio_net ata_piix crc32c_intel libata virtio_pci i2c_core virtio_ring serio_raw virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_vlan]
 CPU: 3 PID: 3119 Comm: tc Tainted: G            E    4.16.0-rc4.act_vlan.orig+ #403
 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 RIP: 0010:__call_rcu+0x23/0x2b0
 RSP: 0018:ffffaac3005fb798 EFLAGS: 00010246
 RAX: ffffffffc0704080 RBX: ffff97f2b4bbe900 RCX: 00000000ffffffff
 RDX: ffffffffabca5f00 RSI: 0000000000000010 RDI: 0000000000000010
 RBP: 0000000000000010 R08: 0000000000000001 R09: 0000000000000044
 R10: 00000000fd003000 R11: ffff97f2faab5b91 R12: 0000000000000000
 R13: ffffffffabca5f00 R14: ffff97f2fb80202c R15: 00000000fffffff4
 FS:  00007f68f75b4740(0000) GS:ffff97f2ffd80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000018 CR3: 0000000072b52001 CR4: 00000000001606e0
 Call Trace:
  __tcf_idr_release+0x79/0xf0
  tcf_vlan_init+0x168/0x270 [act_vlan]
  tcf_action_init_1+0x2cc/0x430
  tcf_action_init+0xd3/0x1b0
  tc_ctl_action+0x18b/0x240
  rtnetlink_rcv_msg+0x29c/0x310
  ? _cond_resched+0x15/0x30
  ? __kmalloc_node_track_caller+0x1b9/0x270
  ? rtnl_calcit.isra.28+0x100/0x100
  netlink_rcv_skb+0xd2/0x110
  netlink_unicast+0x17c/0x230
  netlink_sendmsg+0x2cd/0x3c0
  sock_sendmsg+0x30/0x40
  ___sys_sendmsg+0x27a/0x290
  ? filemap_map_pages+0x34a/0x3a0
  ? __handle_mm_fault+0xbfd/0xe20
  __sys_sendmsg+0x51/0x90
  do_syscall_64+0x6e/0x1a0
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
 RIP: 0033:0x7f68f69c5ba0
 RSP: 002b:00007fffd79c1118 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 00007fffd79c1240 RCX: 00007f68f69c5ba0
 RDX: 0000000000000000 RSI: 00007fffd79c1190 RDI: 0000000000000003
 RBP: 000000005aaa708e R08: 0000000000000002 R09: 0000000000000000
 R10: 00007fffd79c0ba0 R11: 0000000000000246 R12: 0000000000000000
 R13: 00007fffd79c1254 R14: 0000000000000001 R15: 0000000000669f60
 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89
 RIP: __call_rcu+0x23/0x2b0 RSP: ffffaac3005fb798
 CR2: 0000000000000018

fix this in tcf_vlan_cleanup(), ensuring that kfree_rcu(p, ...) is called
only when p is not NULL.

Fixes: 4c5b9d9642c8 ("act_vlan: VLAN action rewrite to use RCU lock/unlock and update")
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Manish Kurup <manish.kurup@verizon.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_vlan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index e1a1b3f3983a..c2914e9a4a6f 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -225,7 +225,8 @@ static void tcf_vlan_cleanup(struct tc_action *a)
 	struct tcf_vlan_params *p;
 
 	p = rcu_dereference_protected(v->vlan_p, 1);
-	kfree_rcu(p, rcu);
+	if (p)
+		kfree_rcu(p, rcu);
 }
 
 static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
-- 
cgit v1.2.3-59-g8ed1b


From aab378a7eda21941c4b678b21aea8867b6817f59 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 16 Mar 2018 00:00:54 +0100
Subject: net/sched: fix NULL dereference in the error path of tcf_csum_init()

when the following command

 # tc action add action csum udp continue index 100

is run for the first time, and tcf_csum_init() fails allocating struct
tcf_csum, tcf_csum_cleanup() calls kfree_rcu(NULL,...). This causes the
following error:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
 IP: __call_rcu+0x23/0x2b0
 PGD 80000000740b4067 P4D 80000000740b4067 PUD 32e7f067 PMD 0
 Oops: 0002 [#1] SMP PTI
 Modules linked in: act_csum(E) act_vlan ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache jbd2 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_generic pcbc snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm snd_timer aesni_intel crypto_simd glue_helper cryptd snd joydev pcspkr virtio_balloon i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_blk drm virtio_net virtio_console ata_piix crc32c_intel libata virtio_pci serio_raw i2c_core virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_vlan]
 CPU: 2 PID: 5763 Comm: tc Tainted: G            E    4.16.0-rc4.act_vlan.orig+ #403
 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 RIP: 0010:__call_rcu+0x23/0x2b0
 RSP: 0018:ffffb275803e77c0 EFLAGS: 00010246
 RAX: ffffffffc057b080 RBX: ffff9674bc6f5240 RCX: 00000000ffffffff
 RDX: ffffffff928a5f00 RSI: 0000000000000008 RDI: 0000000000000008
 RBP: 0000000000000008 R08: 0000000000000001 R09: 0000000000000044
 R10: 0000000000000220 R11: ffff9674b9ab4821 R12: 0000000000000000
 R13: ffffffff928a5f00 R14: 0000000000000000 R15: 0000000000000001
 FS:  00007fa6368d8740(0000) GS:ffff9674bfd00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000010 CR3: 0000000073dec001 CR4: 00000000001606e0
 Call Trace:
  __tcf_idr_release+0x79/0xf0
  tcf_csum_init+0xfb/0x180 [act_csum]
  tcf_action_init_1+0x2cc/0x430
  tcf_action_init+0xd3/0x1b0
  tc_ctl_action+0x18b/0x240
  rtnetlink_rcv_msg+0x29c/0x310
  ? _cond_resched+0x15/0x30
  ? __kmalloc_node_track_caller+0x1b9/0x270
  ? rtnl_calcit.isra.28+0x100/0x100
  netlink_rcv_skb+0xd2/0x110
  netlink_unicast+0x17c/0x230
  netlink_sendmsg+0x2cd/0x3c0
  sock_sendmsg+0x30/0x40
  ___sys_sendmsg+0x27a/0x290
  ? filemap_map_pages+0x34a/0x3a0
  ? __handle_mm_fault+0xbfd/0xe20
  __sys_sendmsg+0x51/0x90
  do_syscall_64+0x6e/0x1a0
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
 RIP: 0033:0x7fa635ce9ba0
 RSP: 002b:00007ffc185b0fc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 00007ffc185b10f0 RCX: 00007fa635ce9ba0
 RDX: 0000000000000000 RSI: 00007ffc185b1040 RDI: 0000000000000003
 RBP: 000000005aaa85e0 R08: 0000000000000002 R09: 0000000000000000
 R10: 00007ffc185b0a20 R11: 0000000000000246 R12: 0000000000000000
 R13: 00007ffc185b1104 R14: 0000000000000001 R15: 0000000000669f60
 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89
 RIP: __call_rcu+0x23/0x2b0 RSP: ffffb275803e77c0
 CR2: 0000000000000010

fix this in tcf_csum_cleanup(), ensuring that kfree_rcu(param, ...) is
called only when param is not NULL.

Fixes: 9c5f69bbd75a ("net/sched: act_csum: don't use spinlock in the fast path")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_csum.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 24b2e8e681cf..2a5c8fd860cf 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -626,7 +626,8 @@ static void tcf_csum_cleanup(struct tc_action *a)
 	struct tcf_csum_params *params;
 
 	params = rcu_dereference_protected(p->params, 1);
-	kfree_rcu(params, rcu);
+	if (params)
+		kfree_rcu(params, rcu);
 }
 
 static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
-- 
cgit v1.2.3-59-g8ed1b


From abdadd3cfd3e7ea3da61ac774f84777d1f702058 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 16 Mar 2018 00:00:55 +0100
Subject: net/sched: fix NULL dereference in the error path of
 tunnel_key_init()

when the following command

 # tc action add action tunnel_key unset index 100

is run for the first time, and tunnel_key_init() fails to allocate struct
tcf_tunnel_key_params, tunnel_key_release() dereferences NULL pointers.
This causes the following error:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
 IP: tunnel_key_release+0xd/0x40 [act_tunnel_key]
 PGD 8000000033787067 P4D 8000000033787067 PUD 74646067 PMD 0
 Oops: 0000 [#1] SMP PTI
 Modules linked in: act_tunnel_key(E) act_csum ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache jbd2 crct10dif_pclmul crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel pcbc snd_hda_codec snd_hda_core snd_hwdep snd_seq aesni_intel snd_seq_device crypto_simd glue_helper snd_pcm cryptd joydev snd_timer pcspkr virtio_balloon snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_net virtio_blk drm virtio_console crc32c_intel ata_piix serio_raw i2c_core virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod
 CPU: 2 PID: 3101 Comm: tc Tainted: G            E    4.16.0-rc4.act_vlan.orig+ #403
 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 RIP: 0010:tunnel_key_release+0xd/0x40 [act_tunnel_key]
 RSP: 0018:ffffba46803b7768 EFLAGS: 00010286
 RAX: ffffffffc09010a0 RBX: 0000000000000000 RCX: 0000000000000024
 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff99ee336d7480
 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000044
 R10: 0000000000000220 R11: ffff99ee79d73131 R12: 0000000000000000
 R13: ffff99ee32d67610 R14: ffff99ee7671dc38 R15: 00000000fffffff4
 FS:  00007febcb2cd740(0000) GS:ffff99ee7fd00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000010 CR3: 000000007c8e4005 CR4: 00000000001606e0
 Call Trace:
  __tcf_idr_release+0x79/0xf0
  tunnel_key_init+0xd9/0x460 [act_tunnel_key]
  tcf_action_init_1+0x2cc/0x430
  tcf_action_init+0xd3/0x1b0
  tc_ctl_action+0x18b/0x240
  rtnetlink_rcv_msg+0x29c/0x310
  ? _cond_resched+0x15/0x30
  ? __kmalloc_node_track_caller+0x1b9/0x270
  ? rtnl_calcit.isra.28+0x100/0x100
  netlink_rcv_skb+0xd2/0x110
  netlink_unicast+0x17c/0x230
  netlink_sendmsg+0x2cd/0x3c0
  sock_sendmsg+0x30/0x40
  ___sys_sendmsg+0x27a/0x290
  __sys_sendmsg+0x51/0x90
  do_syscall_64+0x6e/0x1a0
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
 RIP: 0033:0x7febca6deba0
 RSP: 002b:00007ffe7b0dd128 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 00007ffe7b0dd250 RCX: 00007febca6deba0
 RDX: 0000000000000000 RSI: 00007ffe7b0dd1a0 RDI: 0000000000000003
 RBP: 000000005aaa90cb R08: 0000000000000002 R09: 0000000000000000
 R10: 00007ffe7b0dcba0 R11: 0000000000000246 R12: 0000000000000000
 R13: 00007ffe7b0dd264 R14: 0000000000000001 R15: 0000000000669f60
 Code: 44 00 00 8b 0d b5 23 00 00 48 8b 87 48 10 00 00 48 8b 3c c8 e9 a5 e5 d8 c3 0f 1f 44 00 00 0f 1f 44 00 00 53 48 8b 9f b0 00 00 00 <83> 7b 10 01 74 0b 48 89 df 31 f6 5b e9 f2 fa 7f c3 48 8b 7b 18
 RIP: tunnel_key_release+0xd/0x40 [act_tunnel_key] RSP: ffffba46803b7768
 CR2: 0000000000000010

Fix this in tunnel_key_release(), ensuring 'param' is not NULL before
dereferencing it.

Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_tunnel_key.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index fea772e66e62..1281ca463727 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -208,11 +208,12 @@ static void tunnel_key_release(struct tc_action *a)
 	struct tcf_tunnel_key_params *params;
 
 	params = rcu_dereference_protected(t->params, 1);
+	if (params) {
+		if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+			dst_release(&params->tcft_enc_metadata->dst);
 
-	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
-		dst_release(&params->tcft_enc_metadata->dst);
-
-	kfree_rcu(params, rcu);
+		kfree_rcu(params, rcu);
+	}
 }
 
 static int tunnel_key_dump_addresses(struct sk_buff *skb,
-- 
cgit v1.2.3-59-g8ed1b


From 1f110e7cae09e6c6a144616480d1a9dd99c5208a Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 16 Mar 2018 00:00:56 +0100
Subject: net/sched: fix NULL dereference in the error path of
 tcf_sample_init()

when the following command

 # tc action add action sample rate 100 group 100 index 100

is run for the first time, and psample_group_get(100) fails to create a
new group, tcf_sample_cleanup() calls psample_group_put(NULL), thus
causing the following error:

 BUG: unable to handle kernel NULL pointer dereference at 000000000000001c
 IP: psample_group_put+0x15/0x71 [psample]
 PGD 8000000075775067 P4D 8000000075775067 PUD 7453c067 PMD 0
 Oops: 0002 [#1] SMP PTI
 Modules linked in: act_sample(E) psample ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core mbcache jbd2 crct10dif_pclmul snd_hwdep crc32_pclmul snd_seq ghash_clmulni_intel pcbc snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer glue_helper snd cryptd joydev pcspkr i2c_piix4 soundcore virtio_balloon nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_net ata_piix virtio_console virtio_blk libata serio_raw crc32c_intel virtio_pci i2c_core virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_tunnel_key]
 CPU: 2 PID: 5740 Comm: tc Tainted: G            E    4.16.0-rc4.act_vlan.orig+ #403
 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 RIP: 0010:psample_group_put+0x15/0x71 [psample]
 RSP: 0018:ffffb8a80032f7d0 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000024
 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffffffffc06d93c0
 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000044
 R10: 00000000bd003000 R11: ffff979fba04aa59 R12: 0000000000000000
 R13: 0000000000000000 R14: 0000000000000000 R15: ffff979fbba3f22c
 FS:  00007f7638112740(0000) GS:ffff979fbfd00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000000000000001c CR3: 00000000734ea001 CR4: 00000000001606e0
 Call Trace:
  __tcf_idr_release+0x79/0xf0
  tcf_sample_init+0x125/0x1d0 [act_sample]
  tcf_action_init_1+0x2cc/0x430
  tcf_action_init+0xd3/0x1b0
  tc_ctl_action+0x18b/0x240
  rtnetlink_rcv_msg+0x29c/0x310
  ? _cond_resched+0x15/0x30
  ? __kmalloc_node_track_caller+0x1b9/0x270
  ? rtnl_calcit.isra.28+0x100/0x100
  netlink_rcv_skb+0xd2/0x110
  netlink_unicast+0x17c/0x230
  netlink_sendmsg+0x2cd/0x3c0
  sock_sendmsg+0x30/0x40
  ___sys_sendmsg+0x27a/0x290
  ? filemap_map_pages+0x34a/0x3a0
  ? __handle_mm_fault+0xbfd/0xe20
  __sys_sendmsg+0x51/0x90
  do_syscall_64+0x6e/0x1a0
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
 RIP: 0033:0x7f7637523ba0
 RSP: 002b:00007fff0473ef58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 00007fff0473f080 RCX: 00007f7637523ba0
 RDX: 0000000000000000 RSI: 00007fff0473efd0 RDI: 0000000000000003
 RBP: 000000005aaaac80 R08: 0000000000000002 R09: 0000000000000000
 R10: 00007fff0473e9e0 R11: 0000000000000246 R12: 0000000000000000
 R13: 00007fff0473f094 R14: 0000000000000001 R15: 0000000000669f60
 Code: be 02 00 00 00 48 89 df e8 a9 fe ff ff e9 7c ff ff ff 0f 1f 40 00 0f 1f 44 00 00 53 48 89 fb 48 c7 c7 c0 93 6d c0 e8 db 20 8c ef <83> 6b 1c 01 74 10 48 c7 c7 c0 93 6d c0 ff 14 25 e8 83 83 b0 5b
 RIP: psample_group_put+0x15/0x71 [psample] RSP: ffffb8a80032f7d0
 CR2: 000000000000001c

Fix it in tcf_sample_cleanup(), ensuring that calls to psample_group_put(p)
are done only when p is not NULL.

Fixes: cadb9c9fdbc6 ("net/sched: act_sample: Fix error path in init")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_sample.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1ba0df238756..74c5d7e6a0fa 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -103,7 +103,8 @@ static void tcf_sample_cleanup(struct tc_action *a)
 
 	psample_group = rtnl_dereference(s->psample_group);
 	RCU_INIT_POINTER(s->psample_group, NULL);
-	psample_group_put(psample_group);
+	if (psample_group)
+		psample_group_put(psample_group);
 }
 
 static bool tcf_sample_dev_ok_push(struct net_device *dev)
-- 
cgit v1.2.3-59-g8ed1b


From 2d433610176d6569e8b3a28f67bc72235bf69efc Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 16 Mar 2018 00:00:57 +0100
Subject: net/sched: fix NULL dereference on the error path of
 tcf_skbmod_init()

when the following command

 # tc action replace action skbmod swap mac index 100

is run for the first time, and tcf_skbmod_init() fails to allocate struct
tcf_skbmod_params, tcf_skbmod_cleanup() calls kfree_rcu(NULL), thus
causing the following error:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
 IP: __call_rcu+0x23/0x2b0
 PGD 8000000034057067 P4D 8000000034057067 PUD 74937067 PMD 0
 Oops: 0002 [#1] SMP PTI
 Modules linked in: act_skbmod(E) psample ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec crct10dif_pclmul mbcache jbd2 crc32_pclmul snd_hda_core ghash_clmulni_intel snd_hwdep pcbc snd_seq snd_seq_device snd_pcm aesni_intel snd_timer crypto_simd glue_helper snd cryptd virtio_balloon joydev soundcore pcspkr i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_console virtio_net virtio_blk ata_piix libata crc32c_intel virtio_pci serio_raw virtio_ring virtio i2c_core floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_skbmod]
 CPU: 3 PID: 3144 Comm: tc Tainted: G            E    4.16.0-rc4.act_vlan.orig+ #403
 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 RIP: 0010:__call_rcu+0x23/0x2b0
 RSP: 0018:ffffbd2e403e7798 EFLAGS: 00010246
 RAX: ffffffffc0872080 RBX: ffff981d34bff780 RCX: 00000000ffffffff
 RDX: ffffffff922a5f00 RSI: 0000000000000000 RDI: 0000000000000000
 RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000021f
 R10: 000000003d003000 R11: 0000000000aaaaaa R12: 0000000000000000
 R13: ffffffff922a5f00 R14: 0000000000000001 R15: ffff981d3b698c2c
 FS:  00007f3678292740(0000) GS:ffff981d3fd80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000008 CR3: 000000007c57a006 CR4: 00000000001606e0
 Call Trace:
  __tcf_idr_release+0x79/0xf0
  tcf_skbmod_init+0x1d1/0x210 [act_skbmod]
  tcf_action_init_1+0x2cc/0x430
  tcf_action_init+0xd3/0x1b0
  tc_ctl_action+0x18b/0x240
  rtnetlink_rcv_msg+0x29c/0x310
  ? _cond_resched+0x15/0x30
  ? __kmalloc_node_track_caller+0x1b9/0x270
  ? rtnl_calcit.isra.28+0x100/0x100
  netlink_rcv_skb+0xd2/0x110
  netlink_unicast+0x17c/0x230
  netlink_sendmsg+0x2cd/0x3c0
  sock_sendmsg+0x30/0x40
  ___sys_sendmsg+0x27a/0x290
  ? filemap_map_pages+0x34a/0x3a0
  ? __handle_mm_fault+0xbfd/0xe20
  __sys_sendmsg+0x51/0x90
  do_syscall_64+0x6e/0x1a0
  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
 RIP: 0033:0x7f36776a3ba0
 RSP: 002b:00007fff4703b618 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 00007fff4703b740 RCX: 00007f36776a3ba0
 RDX: 0000000000000000 RSI: 00007fff4703b690 RDI: 0000000000000003
 RBP: 000000005aaaba36 R08: 0000000000000002 R09: 0000000000000000
 R10: 00007fff4703b0a0 R11: 0000000000000246 R12: 0000000000000000
 R13: 00007fff4703b754 R14: 0000000000000001 R15: 0000000000669f60
 Code: 5d e9 42 da ff ff 66 90 0f 1f 44 00 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 83 ec 08 40 f6 c7 07 0f 85 19 02 00 00 <48> 89 75 08 48 c7 45 00 00 00 00 00 9c 58 0f 1f 44 00 00 49 89
 RIP: __call_rcu+0x23/0x2b0 RSP: ffffbd2e403e7798
 CR2: 0000000000000008

Fix it in tcf_skbmod_cleanup(), ensuring that kfree_rcu(p, ...) is called
only when p is not NULL.

Fixes: 86da71b57383 ("net_sched: Introduce skbmod action")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_skbmod.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index fa975262dbac..d09565d6433e 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -190,7 +190,8 @@ static void tcf_skbmod_cleanup(struct tc_action *a)
 	struct tcf_skbmod_params  *p;
 
 	p = rcu_dereference_protected(d->skbmod_p, 1);
-	kfree_rcu(p, rcu);
+	if (p)
+		kfree_rcu(p, rcu);
 }
 
 static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
-- 
cgit v1.2.3-59-g8ed1b


From 48881ed56b395bdf2cff6c102039016223ca4da6 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 18 Mar 2018 09:48:03 +0100
Subject: batman-adv: Add missing include for EPOLL* constants

Fixes: a9a08845e9ac ("vfs: do bulk POLL* -> EPOLL* replacement")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/icmp_socket.c | 1 +
 net/batman-adv/log.c         | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index e91f29c7c638..5daa3d50da17 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -24,6 +24,7 @@
 #include <linux/debugfs.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
+#include <linux/eventpoll.h>
 #include <linux/export.h>
 #include <linux/fcntl.h>
 #include <linux/fs.h>
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index dc9fa37ddd14..cdbe0e5e208b 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -22,6 +22,7 @@
 #include <linux/compiler.h>
 #include <linux/debugfs.h>
 #include <linux/errno.h>
+#include <linux/eventpoll.h>
 #include <linux/export.h>
 #include <linux/fcntl.h>
 #include <linux/fs.h>
-- 
cgit v1.2.3-59-g8ed1b


From fc04fdb2c8a894283259f5621d31d75610701091 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 16 Mar 2018 21:14:32 +0100
Subject: batman-adv: Fix skbuff rcsum on packet reroute

batadv_check_unicast_ttvn may redirect a packet to itself or another
originator. This involves rewriting the ttvn and the destination address in
the batadv unicast header. These field were not yet pulled (with skb rcsum
update) and thus any change to them also requires a change in the receive
checksum.

Reported-by: Matthias Schiffer <mschiffer@universe-factory.net>
Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/routing.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 6a9242658c8d..e61dc1293bb5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -759,6 +759,7 @@ free_skb:
 /**
  * batadv_reroute_unicast_packet() - update the unicast header for re-routing
  * @bat_priv: the bat priv with all the soft interface information
+ * @skb: unicast packet to process
  * @unicast_packet: the unicast header to be updated
  * @dst_addr: the payload destination
  * @vid: VLAN identifier
@@ -770,7 +771,7 @@ free_skb:
  * Return: true if the packet header has been updated, false otherwise
  */
 static bool
-batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
+batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
 			      struct batadv_unicast_packet *unicast_packet,
 			      u8 *dst_addr, unsigned short vid)
 {
@@ -799,8 +800,10 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv,
 	}
 
 	/* update the packet header */
+	skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 	ether_addr_copy(unicast_packet->dest, orig_addr);
 	unicast_packet->ttvn = orig_ttvn;
+	skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 
 	ret = true;
 out:
@@ -841,7 +844,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	 * the packet to
 	 */
 	if (batadv_tt_local_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) {
-		if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+		if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
 						  ethhdr->h_dest, vid))
 			batadv_dbg_ratelimited(BATADV_DBG_TT,
 					       bat_priv,
@@ -887,7 +890,7 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	 * destination can possibly be updated and forwarded towards the new
 	 * target host
 	 */
-	if (batadv_reroute_unicast_packet(bat_priv, unicast_packet,
+	if (batadv_reroute_unicast_packet(bat_priv, skb, unicast_packet,
 					  ethhdr->h_dest, vid)) {
 		batadv_dbg_ratelimited(BATADV_DBG_TT, bat_priv,
 				       "Rerouting unicast packet to %pM (dst=%pM): TTVN mismatch old_ttvn=%u new_ttvn=%u\n",
@@ -910,12 +913,14 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
 	if (!primary_if)
 		return false;
 
+	/* update the packet header */
+	skb_postpull_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 	ether_addr_copy(unicast_packet->dest, primary_if->net_dev->dev_addr);
+	unicast_packet->ttvn = curr_ttvn;
+	skb_postpush_rcsum(skb, unicast_packet, sizeof(*unicast_packet));
 
 	batadv_hardif_put(primary_if);
 
-	unicast_packet->ttvn = curr_ttvn;
-
 	return true;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 7fe4d6dcbcb43fe0282d4213fc52be178bb30e91 Mon Sep 17 00:00:00 2001
From: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Sun, 18 Mar 2018 17:37:22 +0200
Subject: devlink: Remove redundant free on error path

The current code performs unneeded free. Remove the redundant skb freeing
during the error path.

Fixes: 1555d204e743 ("devlink: Support for pipeline debug (dpipe)")
Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 2f2307d94787..effd4848c2b4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1798,7 +1798,7 @@ send_done:
 	if (!nlh) {
 		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
 		if (err)
-			goto err_skb_send_alloc;
+			return err;
 		goto send_done;
 	}
 
@@ -1807,7 +1807,6 @@ send_done:
 nla_put_failure:
 	err = -EMSGSIZE;
 err_table_put:
-err_skb_send_alloc:
 	genlmsg_cancel(skb, hdr);
 	nlmsg_free(skb);
 	return err;
@@ -2073,7 +2072,7 @@ static int devlink_dpipe_entries_fill(struct genl_info *info,
 					     table->counters_enabled,
 					     &dump_ctx);
 	if (err)
-		goto err_entries_dump;
+		return err;
 
 send_done:
 	nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
@@ -2081,16 +2080,10 @@ send_done:
 	if (!nlh) {
 		err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
 		if (err)
-			goto err_skb_send_alloc;
+			return err;
 		goto send_done;
 	}
 	return genlmsg_reply(dump_ctx.skb, info);
-
-err_entries_dump:
-err_skb_send_alloc:
-	genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
-	nlmsg_free(dump_ctx.skb);
-	return err;
 }
 
 static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
@@ -2229,7 +2222,7 @@ send_done:
 	if (!nlh) {
 		err = devlink_dpipe_send_and_alloc_skb(&skb, info);
 		if (err)
-			goto err_skb_send_alloc;
+			return err;
 		goto send_done;
 	}
 	return genlmsg_reply(skb, info);
@@ -2237,7 +2230,6 @@ send_done:
 nla_put_failure:
 	err = -EMSGSIZE;
 err_table_put:
-err_skb_send_alloc:
 	genlmsg_cancel(skb, hdr);
 	nlmsg_free(skb);
 	return err;
-- 
cgit v1.2.3-59-g8ed1b


From 5f2fb802eee1df0810b47ea251942fe3fd36589a Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Mon, 19 Mar 2018 11:24:58 +0100
Subject: ipv6: old_dport should be a __be16 in __ip6_datagram_connect()

Fixes: 2f987a76a977 ("net: ipv6: keep sk status consistent after datagram connect failure")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 8a9ac2d0f5d3..a9f7eca0b6a3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -149,7 +149,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct in6_addr		*daddr, old_daddr;
 	__be32			fl6_flowlabel = 0;
 	__be32			old_fl6_flowlabel;
-	__be32			old_dport;
+	__be16			old_dport;
 	int			addr_type;
 	int			err;
 
-- 
cgit v1.2.3-59-g8ed1b


From 7c181f4fcdc62e5dc7a87fd33387d322262c3b52 Mon Sep 17 00:00:00 2001
From: Ben Caradoc-Davies <ben@transient.nz>
Date: Mon, 19 Mar 2018 12:57:44 +1300
Subject: mac80211: add ieee80211_hw flag for QoS NDP support

Commit 7b6ddeaf27ec ("mac80211: use QoS NDP for AP probing") added an
argument qos_ok to ieee80211_nullfunc_get to support QoS NDP. Despite
the claim in the commit log "Change all the drivers to *not* allow
QoS NDP for now, even though it looks like most of them should be OK
with that", this commit enables QoS NDP in response to beacons (see
change to mlme.c:ieee80211_send_nullfunc), causing ath9k_htc to lose
IP connectivity. See:
https://patchwork.kernel.org/patch/10241109/
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=891060

Introduce a hardware flag to allow such buggy drivers to override the
correct default behaviour of mac80211 of sending QoS NDP packets.

Signed-off-by: Ben Caradoc-Davies <ben@transient.nz>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ++++
 net/mac80211/debugfs.c | 1 +
 net/mac80211/mlme.c    | 3 ++-
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c96511fa9198..2b581bd93812 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2063,6 +2063,9 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on
  *	TDLS links.
  *
+ * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
+ *	support QoS NDP for AP probing - that's most likely a driver bug.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2106,6 +2109,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_REPORTS_LOW_ACK,
 	IEEE80211_HW_SUPPORTS_TX_FRAG,
 	IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
+	IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1f466d12a6bc..94c7ee9df33b 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -212,6 +212,7 @@ static const char *hw_flag_names[] = {
 	FLAG(REPORTS_LOW_ACK),
 	FLAG(SUPPORTS_TX_FRAG),
 	FLAG(SUPPORTS_TDLS_BUFFER_STA),
+	FLAG(DOESNT_SUPPORT_QOS_NDP),
 #undef FLAG
 };
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 39b660b9a908..5f303abac5ad 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -896,7 +896,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	struct ieee80211_hdr_3addr *nullfunc;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
-	skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, true);
+	skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
+		!ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
 	if (!skb)
 		return;
 
-- 
cgit v1.2.3-59-g8ed1b


From bbc09e7842a5023ba5bc0f8d559b9dd464e44006 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 19 Mar 2018 15:31:22 +0100
Subject: net/sched: fix idr leak on the error path of tcf_bpf_init()

when the following command sequence is entered

 # tc action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100
 RTNETLINK answers: Invalid argument
 We have an error talking to the kernel
 # tc action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel

act_bpf correctly refuses to install the first TC rule, because 31 is not
a valid instruction. However, it refuses to install the second TC rule,
even if the BPF code is correct. Furthermore, it's no more possible to
install any other rule having the same value of 'index' until act_bpf
module is unloaded/inserted again. After the idr has been reserved, call
tcf_idr_release() instead of tcf_idr_cleanup(), to fix this issue.

Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_bpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b3f2c15affa7..9d2cabf1dc7e 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -352,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	return res;
 out:
 	if (res == ACT_P_CREATED)
-		tcf_idr_cleanup(*act, est);
+		tcf_idr_release(*act, bind);
 
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 60e10b3adc3bac0f6a894c28e0eb1f2d13607362 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 19 Mar 2018 15:31:23 +0100
Subject: net/sched: fix idr leak in the error path of tcf_simp_init()

if the kernel fails to duplicate 'sdata', creation of a new action fails
with -ENOMEM. However, subsequent attempts to install the same action
using the same value of 'index' systematically fail with -ENOSPC, and
that value of 'index' will no more be usable by act_simple, until rmmod /
insmod of act_simple.ko is done:

 # tc actions add action simple sdata hello index 100
 # tc actions list action simple

        action order 0: Simple <hello>
         index 100 ref 1 bind 0
 # tc actions flush action simple
 # tc actions add action simple sdata hello index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc actions flush action simple
 # tc actions add action simple sdata hello index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 # tc actions add action simple sdata hello index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 ...

Fix this in the error path of tcf_simp_init(), calling tcf_idr_release()
in place of tcf_idr_cleanup().

Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_simple.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 425eac11f6da..b1f38063ada0 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -121,7 +121,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		d = to_defact(*a);
 		ret = alloc_defdata(d, defdata);
 		if (ret < 0) {
-			tcf_idr_cleanup(*a, est);
+			tcf_idr_release(*a, bind);
 			return ret;
 		}
 		d->tcf_action = parm->action;
-- 
cgit v1.2.3-59-g8ed1b


From 5bf7f8185f7c7112decdfe3d3e5c5d5e67f099a1 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 19 Mar 2018 15:31:24 +0100
Subject: net/sched: fix idr leak in the error path of tcf_act_police_init()

tcf_act_police_init() can fail after the idr has been successfully
reserved (e.g., qdisc_get_rtab() may return NULL). When this happens,
subsequent attempts to configure a police rule using the same idr value
systematiclly fail with -ENOSPC:

 # tc action add action police rate 1000 burst 1000 drop index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc action add action police rate 1000 burst 1000 drop index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 # tc action add action police rate 1000 burst 1000 drop index 100
 RTNETLINK answers: No space left on device
 ...

Fix this in the error path of tcf_act_police_init(), calling
tcf_idr_release() in place of tcf_idr_cleanup().

Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 95d3c9097b25..faebf82b99f1 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -194,7 +194,7 @@ failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
 	if (ret == ACT_P_CREATED)
-		tcf_idr_cleanup(*a, est);
+		tcf_idr_release(*a, bind);
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 94fa3f929ec0c048b1f3658cc335b940df4f6d22 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 19 Mar 2018 15:31:25 +0100
Subject: net/sched: fix idr leak in the error path of tcp_pedit_init()

tcf_pedit_init() can fail to allocate 'keys' after the idr has been
successfully reserved. When this happens, subsequent attempts to configure
a pedit rule using the same idr value systematically fail with -ENOSPC:

 # tc action add action pedit munge ip ttl set 63 index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc action add action pedit munge ip ttl set 63 index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 # tc action add action pedit munge ip ttl set 63 index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 ...

Fix this in the error path of tcf_act_pedit_init(), calling
tcf_idr_release() in place of tcf_idr_cleanup().

Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_pedit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 349beaffb29e..fef08835f26d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -176,7 +176,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			tcf_idr_cleanup(*a, est);
+			tcf_idr_release(*a, bind);
 			kfree(keys_ex);
 			return -ENOMEM;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 1e46ef1762bb2e52f0f996131a4d16ed4e9fd065 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 19 Mar 2018 15:31:26 +0100
Subject: net/sched: fix idr leak in the error path of __tcf_ipt_init()

__tcf_ipt_init() can fail after the idr has been successfully reserved.
When this happens, subsequent attempts to configure xt/ipt rules using
the same idr value systematically fail with -ENOSPC:

 # tc action add action xt -j LOG --log-prefix test1 index 100
 tablename: mangle hook: NF_IP_POST_ROUTING
         target:  LOG level warning prefix "test1" index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 Command "(null)" is unknown, try "tc actions help".
 # tc action add action xt -j LOG --log-prefix test1 index 100
 tablename: mangle hook: NF_IP_POST_ROUTING
         target:  LOG level warning prefix "test1" index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 Command "(null)" is unknown, try "tc actions help".
 # tc action add action xt -j LOG --log-prefix test1 index 100
 tablename: mangle hook: NF_IP_POST_ROUTING
         target:  LOG level warning prefix "test1" index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 ...

Fix this in the error path of __tcf_ipt_init(), calling tcf_idr_release()
in place of tcf_idr_cleanup(). Since tcf_ipt_release() can now be called
when tcfi_t is NULL, we also need to protect calls to ipt_destroy_target()
to avoid NULL pointer dereference.

Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ipt.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 06e380ae0928..7e06b9b62613 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -80,9 +80,12 @@ static void ipt_destroy_target(struct xt_entry_target *t)
 static void tcf_ipt_release(struct tc_action *a)
 {
 	struct tcf_ipt *ipt = to_ipt(a);
-	ipt_destroy_target(ipt->tcfi_t);
+
+	if (ipt->tcfi_t) {
+		ipt_destroy_target(ipt->tcfi_t);
+		kfree(ipt->tcfi_t);
+	}
 	kfree(ipt->tcfi_tname);
-	kfree(ipt->tcfi_t);
 }
 
 static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
@@ -187,7 +190,7 @@ err2:
 	kfree(tname);
 err1:
 	if (ret == ACT_P_CREATED)
-		tcf_idr_cleanup(*a, est);
+		tcf_idr_release(*a, bind);
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d7f20015736048f494988093b59fe513dc232ce9 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 19 Mar 2018 15:31:27 +0100
Subject: net/sched: fix idr leak in the error path of tcf_vlan_init()

tcf_vlan_init() can fail after the idr has been successfully reserved.
When this happens, every subsequent attempt to configure vlan rules using
the same idr value will systematically fail with -ENOSPC, unless the first
attempt was done using the 'replace' keyword.

 # tc action add action vlan pop index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc action add action vlan pop index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 # tc action add action vlan pop index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 ...

Fix this in tcf_vlan_init(), ensuring that tcf_idr_release() is called on
the error path when the idr has been reserved, but not yet inserted. Also,
don't test 'ovr' in the error path, to avoid a 'replace' failure implicitly
become a 'delete' that leaks refcount in act_vlan module:

 # rmmod act_vlan; modprobe act_vlan
 # tc action add action vlan push id 5 index 100
 # tc action replace action vlan push id 7 index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc action list action vlan
 #
 # rmmod act_vlan
 rmmod: ERROR: Module act_vlan is in use

Fixes: 4c5b9d9642c8 ("act_vlan: VLAN action rewrite to use RCU lock/unlock and update")
Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_vlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index c2914e9a4a6f..c49cb61adedf 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -195,7 +195,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
-		if (ovr)
+		if (ret == ACT_P_CREATED)
 			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From f29cdfbe33d6915ba8056179b0041279a67e3647 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 19 Mar 2018 15:31:28 +0100
Subject: net/sched: fix idr leak in the error path of tcf_skbmod_init()

tcf_skbmod_init() can fail after the idr has been successfully reserved.
When this happens, every subsequent attempt to configure skbmod rules
using the same idr value will systematically fail with -ENOSPC, unless
the first attempt was done using the 'replace' keyword:

 # tc action add action skbmod swap mac index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc action add action skbmod swap mac index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 # tc action add action skbmod swap mac index 100
 RTNETLINK answers: No space left on device
 We have an error talking to the kernel
 ...

Fix this in tcf_skbmod_init(), ensuring that tcf_idr_release() is called
on the error path when the idr has been reserved, but not yet inserted.
Also, don't test 'ovr' in the error path, to avoid a 'replace' failure
implicitly become a 'delete' that leaks refcount in act_skbmod module:

 # rmmod act_skbmod; modprobe act_skbmod
 # tc action add action skbmod swap mac index 100
 # tc action add action skbmod swap mac continue index 100
 RTNETLINK answers: File exists
 We have an error talking to the kernel
 # tc action replace action skbmod swap mac continue index 100
 RTNETLINK answers: Cannot allocate memory
 We have an error talking to the kernel
 # tc action list action skbmod
 #
 # rmmod  act_skbmod
 rmmod: ERROR: Module act_skbmod is in use

Fixes: 65a206c01e8e ("net/sched: Change act_api and act_xxx modules to use IDR")
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_skbmod.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index d09565d6433e..7b0700f52b50 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -152,7 +152,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
-		if (ovr)
+		if (ret == ACT_P_CREATED)
 			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 191f86ca8ef27f7a492fd1c03620498c6e94f0ac Mon Sep 17 00:00:00 2001
From: David Lebrun <dlebrun@google.com>
Date: Tue, 20 Mar 2018 14:44:55 +0000
Subject: ipv6: sr: fix scheduling in RCU when creating seg6 lwtunnel state

The seg6_build_state() function is called with RCU read lock held,
so we cannot use GFP_KERNEL. This patch uses GFP_ATOMIC instead.

[   92.770271] =============================
[   92.770628] WARNING: suspicious RCU usage
[   92.770921] 4.16.0-rc4+ #12 Not tainted
[   92.771277] -----------------------------
[   92.771585] ./include/linux/rcupdate.h:302 Illegal context switch in RCU read-side critical section!
[   92.772279]
[   92.772279] other info that might help us debug this:
[   92.772279]
[   92.773067]
[   92.773067] rcu_scheduler_active = 2, debug_locks = 1
[   92.773514] 2 locks held by ip/2413:
[   92.773765]  #0:  (rtnl_mutex){+.+.}, at: [<00000000e5461720>] rtnetlink_rcv_msg+0x441/0x4d0
[   92.774377]  #1:  (rcu_read_lock){....}, at: [<00000000df4f161e>] lwtunnel_build_state+0x59/0x210
[   92.775065]
[   92.775065] stack backtrace:
[   92.775371] CPU: 0 PID: 2413 Comm: ip Not tainted 4.16.0-rc4+ #12
[   92.775791] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014
[   92.776608] Call Trace:
[   92.776852]  dump_stack+0x7d/0xbc
[   92.777130]  __schedule+0x133/0xf00
[   92.777393]  ? unwind_get_return_address_ptr+0x50/0x50
[   92.777783]  ? __sched_text_start+0x8/0x8
[   92.778073]  ? rcu_is_watching+0x19/0x30
[   92.778383]  ? kernel_text_address+0x49/0x60
[   92.778800]  ? __kernel_text_address+0x9/0x30
[   92.779241]  ? unwind_get_return_address+0x29/0x40
[   92.779727]  ? pcpu_alloc+0x102/0x8f0
[   92.780101]  _cond_resched+0x23/0x50
[   92.780459]  __mutex_lock+0xbd/0xad0
[   92.780818]  ? pcpu_alloc+0x102/0x8f0
[   92.781194]  ? seg6_build_state+0x11d/0x240
[   92.781611]  ? save_stack+0x9b/0xb0
[   92.781965]  ? __ww_mutex_wakeup_for_backoff+0xf0/0xf0
[   92.782480]  ? seg6_build_state+0x11d/0x240
[   92.782925]  ? lwtunnel_build_state+0x1bd/0x210
[   92.783393]  ? ip6_route_info_create+0x687/0x1640
[   92.783846]  ? ip6_route_add+0x74/0x110
[   92.784236]  ? inet6_rtm_newroute+0x8a/0xd0

Fixes: 6c8702c60b886 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
Signed-off-by: David Lebrun <dlebrun@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6_iptunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index bd6cc688bd19..8367b859a934 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -418,7 +418,7 @@ static int seg6_build_state(struct nlattr *nla,
 
 	slwt = seg6_lwt_lwtunnel(newts);
 
-	err = dst_cache_init(&slwt->cache, GFP_KERNEL);
+	err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
 	if (err) {
 		kfree(newts);
 		return err;
-- 
cgit v1.2.3-59-g8ed1b


From 8936ef7604c11b5d701580d779e0f5684abc7b68 Mon Sep 17 00:00:00 2001
From: David Lebrun <dlebrun@google.com>
Date: Tue, 20 Mar 2018 14:44:56 +0000
Subject: ipv6: sr: fix NULL pointer dereference when setting encap source
 address

When using seg6 in encap mode, we call ipv6_dev_get_saddr() to set the
source address of the outer IPv6 header, in case none was specified.
Using skb->dev can lead to BUG() when it is in an inconsistent state.
This patch uses the net_device attached to the skb's dst instead.

[940807.667429] BUG: unable to handle kernel NULL pointer dereference at 000000000000047c
[940807.762427] IP: ipv6_dev_get_saddr+0x8b/0x1d0
[940807.815725] PGD 0 P4D 0
[940807.847173] Oops: 0000 [#1] SMP PTI
[940807.890073] Modules linked in:
[940807.927765] CPU: 6 PID: 0 Comm: swapper/6 Tainted: G        W        4.16.0-rc1-seg6bpf+ #2
[940808.028988] Hardware name: HP ProLiant DL120 G6/ProLiant DL120 G6, BIOS O26    09/06/2010
[940808.128128] RIP: 0010:ipv6_dev_get_saddr+0x8b/0x1d0
[940808.187667] RSP: 0018:ffff88043fd836b0 EFLAGS: 00010206
[940808.251366] RAX: 0000000000000005 RBX: ffff88042cb1c860 RCX: 00000000000000fe
[940808.338025] RDX: 00000000000002c0 RSI: ffff88042cb1c860 RDI: 0000000000004500
[940808.424683] RBP: ffff88043fd83740 R08: 0000000000000000 R09: ffffffffffffffff
[940808.511342] R10: 0000000000000040 R11: 0000000000000000 R12: ffff88042cb1c850
[940808.598012] R13: ffffffff8208e380 R14: ffff88042ac8da00 R15: 0000000000000002
[940808.684675] FS:  0000000000000000(0000) GS:ffff88043fd80000(0000) knlGS:0000000000000000
[940808.783036] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[940808.852975] CR2: 000000000000047c CR3: 00000004255fe000 CR4: 00000000000006e0
[940808.939634] Call Trace:
[940808.970041]  <IRQ>
[940808.995250]  ? ip6t_do_table+0x265/0x640
[940809.043341]  seg6_do_srh_encap+0x28f/0x300
[940809.093516]  ? seg6_do_srh+0x1a0/0x210
[940809.139528]  seg6_do_srh+0x1a0/0x210
[940809.183462]  seg6_output+0x28/0x1e0
[940809.226358]  lwtunnel_output+0x3f/0x70
[940809.272370]  ip6_xmit+0x2b8/0x530
[940809.313185]  ? ac6_proc_exit+0x20/0x20
[940809.359197]  inet6_csk_xmit+0x7d/0xc0
[940809.404173]  tcp_transmit_skb+0x548/0x9a0
[940809.453304]  __tcp_retransmit_skb+0x1a8/0x7a0
[940809.506603]  ? ip6_default_advmss+0x40/0x40
[940809.557824]  ? tcp_current_mss+0x24/0x90
[940809.605925]  tcp_retransmit_skb+0xd/0x80
[940809.654016]  tcp_xmit_retransmit_queue.part.17+0xf9/0x210
[940809.719797]  tcp_ack+0xa47/0x1110
[940809.760612]  tcp_rcv_established+0x13c/0x570
[940809.812865]  tcp_v6_do_rcv+0x151/0x3d0
[940809.858879]  tcp_v6_rcv+0xa5c/0xb10
[940809.901770]  ? seg6_output+0xdd/0x1e0
[940809.946745]  ip6_input_finish+0xbb/0x460
[940809.994837]  ip6_input+0x74/0x80
[940810.034612]  ? ip6_rcv_finish+0xb0/0xb0
[940810.081663]  ipv6_rcv+0x31c/0x4c0
...

Fixes: 6c8702c60b886 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
Reported-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David Lebrun <dlebrun@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6_iptunnel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 8367b859a934..7a78dcfda68a 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -93,7 +93,8 @@ static void set_tun_src(struct net *net, struct net_device *dev,
 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
 int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
+	struct dst_entry *dst = skb_dst(skb);
+	struct net *net = dev_net(dst->dev);
 	struct ipv6hdr *hdr, *inner_hdr;
 	struct ipv6_sr_hdr *isrh;
 	int hdrlen, tot_len, err;
@@ -134,7 +135,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 	isrh->nexthdr = proto;
 
 	hdr->daddr = isrh->segments[isrh->first_segment];
-	set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
+	set_tun_src(net, ip6_dst_idev(dst)->dev, &hdr->daddr, &hdr->saddr);
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	if (sr_has_hmac(isrh)) {
-- 
cgit v1.2.3-59-g8ed1b


From 68e2ffdeb5dbf54bc3a0684aa4e73c6db8675eed Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 20 Mar 2018 10:06:59 -0700
Subject: net/ipv6: Handle onlink flag with multipath routes

For multipath routes the ONLINK flag can be specified per nexthop in
rtnh_flags or globally in rtm_flags. Update ip6_route_multipath_add
to consider the ONLINK setting coming from rtnh_flags. Each loop over
nexthops the config for the sibling route is initialized to the global
config and then per nexthop settings overlayed. The flag is 'or'ed into
fib6_config to handle the ONLINK flag coming from either rtm_flags or
rtnh_flags.

Fixes: fc1e64e1092f ("net/ipv6: Add support for onlink flag")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b3c1137ad0b8..b0d5c64e1978 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4112,6 +4112,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 				r_cfg.fc_encap_type = nla_get_u16(nla);
 		}
 
+		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
 		rt = ip6_route_info_create(&r_cfg, extack);
 		if (IS_ERR(rt)) {
 			err = PTR_ERR(rt);
-- 
cgit v1.2.3-59-g8ed1b