From d6990976af7c5d8f55903bfb4289b6fb030bf754 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Thu, 7 Jun 2018 10:11:02 +0300
Subject: vti6: fix PMTU caching and reporting on xmit

When setting the skb->dst before doing the MTU check, the route PMTU
caching and reporting is done on the new dst which is about to be
released.

Instead, PMTU handling should be done using the original dst.

This is aligned with IPv4 VTI.

Fixes: ccd740cbc6 ("vti6: Add pmtu handling to vti6_xmit.")
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ip6_vti.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index b7f28deddaea..c72ae3a4fe09 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -480,10 +480,6 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 		goto tx_err_dst_release;
 	}
 
-	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
-	skb_dst_set(skb, dst);
-	skb->dev = skb_dst(skb)->dev;
-
 	mtu = dst_mtu(dst);
 	if (!skb->ignore_df && skb->len > mtu) {
 		skb_dst_update_pmtu(skb, mtu);
@@ -498,9 +494,14 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 				  htonl(mtu));
 		}
 
-		return -EMSGSIZE;
+		err = -EMSGSIZE;
+		goto tx_err_dst_release;
 	}
 
+	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+	skb_dst_set(skb, dst);
+	skb->dev = skb_dst(skb)->dev;
+
 	err = dst_output(t->net, skb->sk, skb);
 	if (net_xmit_eval(err) == 0) {
 		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
-- 
cgit v1.2.3-59-g8ed1b


From 45c180bc29babbedd6b8c01b975780ef44d9d09c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Jun 2018 21:35:07 -0700
Subject: xfrm_user: prevent leaking 2 bytes of kernel memory

struct xfrm_userpolicy_type has two holes, so we should not
use C99 style initializer.

KMSAN report:

BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:140 [inline]
BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571
CPU: 1 PID: 4520 Comm: syz-executor841 Not tainted 4.17.0+ #5
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x185/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117
 kmsan_internal_check_memory+0x138/0x1f0 mm/kmsan/kmsan.c:1211
 kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253
 copyout lib/iov_iter.c:140 [inline]
 _copy_to_iter+0x1b14/0x2800 lib/iov_iter.c:571
 copy_to_iter include/linux/uio.h:106 [inline]
 skb_copy_datagram_iter+0x422/0xfa0 net/core/datagram.c:431
 skb_copy_datagram_msg include/linux/skbuff.h:3268 [inline]
 netlink_recvmsg+0x6f1/0x1900 net/netlink/af_netlink.c:1959
 sock_recvmsg_nosec net/socket.c:802 [inline]
 sock_recvmsg+0x1d6/0x230 net/socket.c:809
 ___sys_recvmsg+0x3fe/0x810 net/socket.c:2279
 __sys_recvmmsg+0x58e/0xe30 net/socket.c:2391
 do_sys_recvmmsg+0x2a6/0x3e0 net/socket.c:2472
 __do_sys_recvmmsg net/socket.c:2485 [inline]
 __se_sys_recvmmsg net/socket.c:2481 [inline]
 __x64_sys_recvmmsg+0x15d/0x1c0 net/socket.c:2481
 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x446ce9
RSP: 002b:00007fc307918db8 EFLAGS: 00000293 ORIG_RAX: 000000000000012b
RAX: ffffffffffffffda RBX: 00000000006dbc24 RCX: 0000000000446ce9
RDX: 000000000000000a RSI: 0000000020005040 RDI: 0000000000000003
RBP: 00000000006dbc20 R08: 0000000020004e40 R09: 0000000000000000
R10: 0000000040000000 R11: 0000000000000293 R12: 0000000000000000
R13: 00007ffc8d2df32f R14: 00007fc3079199c0 R15: 0000000000000001

Uninit was stored to memory at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline]
 kmsan_save_stack mm/kmsan/kmsan.c:294 [inline]
 kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685
 kmsan_memcpy_origins+0x11d/0x170 mm/kmsan/kmsan.c:527
 __msan_memcpy+0x109/0x160 mm/kmsan/kmsan_instr.c:413
 __nla_put lib/nlattr.c:569 [inline]
 nla_put+0x276/0x340 lib/nlattr.c:627
 copy_to_user_policy_type net/xfrm/xfrm_user.c:1678 [inline]
 dump_one_policy+0xbe1/0x1090 net/xfrm/xfrm_user.c:1708
 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013
 xfrm_dump_policy+0x1c0/0x2a0 net/xfrm/xfrm_user.c:1749
 netlink_dump+0x9b5/0x1550 net/netlink/af_netlink.c:2226
 __netlink_dump_start+0x1131/0x1270 net/netlink/af_netlink.c:2323
 netlink_dump_start include/linux/netlink.h:214 [inline]
 xfrm_user_rcv_msg+0x8a3/0x9b0 net/xfrm/xfrm_user.c:2577
 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448
 xfrm_netlink_rcv+0xb2/0xf0 net/xfrm/xfrm_user.c:2598
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Local variable description: ----upt.i@dump_one_policy
Variable was created at:
 dump_one_policy+0x78/0x1090 net/xfrm/xfrm_user.c:1689
 xfrm_policy_walk+0x45a/0xd00 net/xfrm/xfrm_policy.c:1013

Byte 130 of 137 is uninitialized
Memory access starts at ffff88019550407f

Fixes: c0144beaeca42 ("[XFRM] netlink: Use nla_put()/NLA_PUT() variantes")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 080035f056d9..1e50b70ad668 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1671,9 +1671,11 @@ static inline unsigned int userpolicy_type_attrsize(void)
 #ifdef CONFIG_XFRM_SUB_POLICY
 static int copy_to_user_policy_type(u8 type, struct sk_buff *skb)
 {
-	struct xfrm_userpolicy_type upt = {
-		.type = type,
-	};
+	struct xfrm_userpolicy_type upt;
+
+	/* Sadly there are two holes in struct xfrm_userpolicy_type */
+	memset(&upt, 0, sizeof(upt));
+	upt.type = type;
 
 	return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 8cc88773855f988d6a3bbf102bbd9dd9c828eb81 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 21 Jun 2018 09:30:47 +0300
Subject: xfrm: fix missing dst_release() after policy blocking lbcast and
 multicast

Fix missing dst_release() when local broadcast or multicast traffic is
xfrm policy blocked.

For IPv4 this results to dst leak: ip_route_output_flow() allocates
dst_entry via __ip_route_output_key() and passes it to
xfrm_lookup_route(). xfrm_lookup returns ERR_PTR(-EPERM) that is
propagated. The dst that was allocated is never released.

IPv4 local broadcast testcase:
 ping -b 192.168.1.255 &
 sleep 1
 ip xfrm policy add src 0.0.0.0/0 dst 192.168.1.255/32 dir out action block

IPv4 multicast testcase:
 ping 224.0.0.1 &
 sleep 1
 ip xfrm policy add src 0.0.0.0/0 dst 224.0.0.1/32 dir out action block

For IPv6 the missing dst_release() causes trouble e.g. when used in netns:
 ip netns add TEST
 ip netns exec TEST ip link set lo up
 ip link add dummy0 type dummy
 ip link set dev dummy0 netns TEST
 ip netns exec TEST ip addr add fd00::1111 dev dummy0
 ip netns exec TEST ip link set dummy0 up
 ip netns exec TEST ping -6 -c 5 ff02::1%dummy0 &
 sleep 1
 ip netns exec TEST ip xfrm policy add src ::/0 dst ff02::1 dir out action block
 wait
 ip netns del TEST

After netns deletion we see:
[  258.239097] unregister_netdevice: waiting for lo to become free. Usage count = 2
[  268.279061] unregister_netdevice: waiting for lo to become free. Usage count = 2
[  278.367018] unregister_netdevice: waiting for lo to become free. Usage count = 2
[  288.375259] unregister_netdevice: waiting for lo to become free. Usage count = 2

Fixes: ac37e2515c1a ("xfrm: release dst_orig in case of error in xfrm_lookup()")
Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5f48251c1319..7c5e8978aeaa 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2286,6 +2286,9 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
 		return make_blackhole(net, dst_orig->ops->family, dst_orig);
 
+	if (IS_ERR(dst))
+		dst_release(dst_orig);
+
 	return dst;
 }
 EXPORT_SYMBOL(xfrm_lookup_route);
-- 
cgit v1.2.3-59-g8ed1b


From 86126b77dcd551ce223e7293bb55854e3df05646 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 14:00:07 +0200
Subject: xfrm: free skb if nlsk pointer is NULL

nlmsg_multicast() always frees the skb, so in case we cannot call
it we must do that ourselves.

Fixes: 21ee543edc0dea ("xfrm: fix race between netns cleanup and state expire notification")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1e50b70ad668..33878e6e0d0a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1025,10 +1025,12 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
 {
 	struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
 
-	if (nlsk)
-		return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
-	else
-		return -1;
+	if (!nlsk) {
+		kfree_skb(skb);
+		return -EPIPE;
+	}
+
+	return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
 }
 
 static inline unsigned int xfrm_spdinfo_msgsize(void)
-- 
cgit v1.2.3-59-g8ed1b


From 7284fdf39a912322ce97de2d30def3c6068a418c Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 27 Jun 2018 11:49:28 +0800
Subject: esp6: fix memleak on error path in esp6_input

This ought to be an omission in e6194923237 ("esp: Fix memleaks on error
paths."). The memleak on error path in esp6_input is similar to esp_input
of esp4.

Fixes: e6194923237 ("esp: Fix memleaks on error paths.")
Fixes: 3f29770723f ("ipsec: check return value of skb_to_sgvec always")
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/esp6.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 97513f35bcc5..88a7579c23bd 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -669,8 +669,10 @@ skip_cow:
 
 	sg_init_table(sg, nfrags);
 	ret = skb_to_sgvec(skb, sg, 0, skb->len);
-	if (unlikely(ret < 0))
+	if (unlikely(ret < 0)) {
+		kfree(tmp);
 		goto out;
+	}
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-- 
cgit v1.2.3-59-g8ed1b


From 9aee40006190a3cda9a4d2dbae71e92617c8c362 Mon Sep 17 00:00:00 2001
From: Lawrence Brakmo <brakmo@fb.com>
Date: Mon, 23 Jul 2018 17:49:39 -0700
Subject: tcp: ack immediately when a cwr packet arrives

We observed high 99 and 99.9% latencies when doing RPCs with DCTCP. The
problem is triggered when the last packet of a request arrives CE
marked. The reply will carry the ECE mark causing TCP to shrink its cwnd
to 1 (because there are no packets in flight). When the 1st packet of
the next request arrives, the ACK was sometimes delayed even though it
is CWR marked, adding up to 40ms to the RPC latency.

This patch insures that CWR marked data packets arriving will be acked
immediately.

Packetdrill script to reproduce the problem:

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
0.110 < [ect0] . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4

0.200 < [ect0] . 1:1001(1000) ack 1 win 257
0.200 > [ect01] . 1:1(0) ack 1001

0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 1:2(1) ack 1001

0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 2:3(1) ack 2001

0.200 < [ect0] . 2001:3001(1000) ack 3 win 257
0.200 < [ect0] . 3001:4001(1000) ack 3 win 257
0.200 > [ect01] . 3:3(0) ack 4001

0.210 < [ce] P. 4001:4501(500) ack 3 win 257

+0.001 read(4, ..., 4500) = 4500
+0 write(4, ..., 1) = 1
+0 > [ect01] PE. 3:4(1) ack 4501

+0.010 < [ect0] W. 4501:5501(1000) ack 4 win 257
// Previously the ACK sequence below would be 4501, causing a long RTO
+0.040~+0.045 > [ect01] . 4:4(0) ack 5501   // delayed ack

+0.311 < [ect0] . 5501:6501(1000) ack 4 win 257  // More data
+0 > [ect01] . 4:4(0) ack 6501     // now acks everything

+0.500 < F. 9501:9501(0) ack 4 win 257

Modified based on comments by Neal Cardwell <ncardwell@google.com>

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3bcd30a2ba06..f9dcb29be12d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -246,8 +246,15 @@ static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
 
 static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
 {
-	if (tcp_hdr(skb)->cwr)
+	if (tcp_hdr(skb)->cwr) {
 		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+
+		/* If the sender is telling us it has entered CWR, then its
+		 * cwnd may be very low (even just 1 packet), so we should ACK
+		 * immediately.
+		 */
+		tcp_enter_quickack_mode((struct sock *)tp, 2);
+	}
 }
 
 static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
-- 
cgit v1.2.3-59-g8ed1b


From b87bac1012c483462e7776c7b7320b659dbb3295 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 25 Jul 2018 06:06:13 +0000
Subject: net: igmp: make function __ip_mc_inc_group() static

Fixes the following sparse warnings:

net/ipv4/igmp.c:1391:6: warning:
 symbol '__ip_mc_inc_group' was not declared. Should it be static?

Fixes: 6e2059b53f98 ("ipv4/igmp: init group mode as INCLUDE when join source group")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 28fef7d15959..75151be21413 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1387,7 +1387,8 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
 /*
  *	A socket has joined a multicast group on device dev.
  */
-void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode)
+static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
+			      unsigned int mode)
 {
 	struct ip_mc_list *im;
 #ifdef CONFIG_IP_MULTICAST
-- 
cgit v1.2.3-59-g8ed1b


From d24458e43b103c7eb7b2fd57bcac392fd7750438 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Mon, 23 Jul 2018 11:43:03 +0200
Subject: xsk: fix poll/POLLIN premature returns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Polling for the ingress queues relies on reading the producer/consumer
pointers of the Rx queue.

Prior this commit, a cached consumer pointer could be used, instead of
the actual consumer pointer and therefore report POLLIN prematurely.

This patch makes sure that the non-cached consumer pointer is used
instead.

Reported-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Qi Zhang <qi.z.zhang@intel.com>
Fixes: c497176cb2e4 ("xsk: add Rx receive functions and poll support")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk_queue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 52ecaf770642..8a64b150be54 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -250,7 +250,7 @@ static inline bool xskq_full_desc(struct xsk_queue *q)
 
 static inline bool xskq_empty_desc(struct xsk_queue *q)
 {
-	return xskq_nb_free(q, q->prod_tail, 1) == q->nentries;
+	return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries;
 }
 
 void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
-- 
cgit v1.2.3-59-g8ed1b


From 7effaf06c3cdef6855e127886c7405b9ab62f90d Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 24 Jul 2018 14:12:20 +0300
Subject: net: rollback orig value on failure of dev_qdisc_change_tx_queue_len

Fix dev_change_tx_queue_len so it rolls back original value
upon a failure in dev_qdisc_change_tx_queue_len.
This is already done for notifirers' failures, share the code.

In case of failure in dev_qdisc_change_tx_queue_len, some tx queues
would still be of the new length, while they should be reverted.
Currently, the revert is not done, and is marked with a TODO label
in dev_qdisc_change_tx_queue_len, and should find some nice solution
to do it.
Yet it is still better to not apply the newly requested value.

Fixes: 48bfd55e7e41 ("net_sched: plug in qdisc ops change_tx_queue_len")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Reported-by: Ran Rozenstein <ranro@mellanox.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a5aa1c7444e6..559a91271f82 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7149,16 +7149,19 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
 		dev->tx_queue_len = new_len;
 		res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
 		res = notifier_to_errno(res);
-		if (res) {
-			netdev_err(dev,
-				   "refused to change device tx_queue_len\n");
-			dev->tx_queue_len = orig_len;
-			return res;
-		}
-		return dev_qdisc_change_tx_queue_len(dev);
+		if (res)
+			goto err_rollback;
+		res = dev_qdisc_change_tx_queue_len(dev);
+		if (res)
+			goto err_rollback;
 	}
 
 	return 0;
+
+err_rollback:
+	netdev_err(dev, "refused to change device tx_queue_len\n");
+	dev->tx_queue_len = orig_len;
+	return res;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 9e630bcb7701f94dbd729fe57d37c089c763ad9f Mon Sep 17 00:00:00 2001
From: Avinash Repaka <avinash.repaka@oracle.com>
Date: Tue, 24 Jul 2018 20:31:58 -0700
Subject: RDS: RDMA: Fix the NULL-ptr deref in rds_ib_get_mr

Registration of a memory region(MR) through FRMR/fastreg(unlike FMR)
needs a connection/qp. With a proxy qp, this dependency on connection
will be removed, but that needs more infrastructure patches, which is a
work in progress.

As an intermediate fix, the get_mr returns EOPNOTSUPP when connection
details are not populated. The MR registration through sendmsg() will
continue to work even with fast registration, since connection in this
case is formed upfront.

This patch fixes the following crash:
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 1 PID: 4244 Comm: syzkaller468044 Not tainted 4.16.0-rc6+ #361
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
RIP: 0010:rds_ib_get_mr+0x5c/0x230 net/rds/ib_rdma.c:544
RSP: 0018:ffff8801b059f890 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: ffff8801b07e1300 RCX: ffffffff8562d96e
RDX: 000000000000000d RSI: 0000000000000001 RDI: 0000000000000068
RBP: ffff8801b059f8b8 R08: ffffed0036274244 R09: ffff8801b13a1200
R10: 0000000000000004 R11: ffffed0036274243 R12: ffff8801b13a1200
R13: 0000000000000001 R14: ffff8801ca09fa9c R15: 0000000000000000
FS:  00007f4d050af700(0000) GS:ffff8801db300000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f4d050aee78 CR3: 00000001b0d9b006 CR4: 00000000001606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 __rds_rdma_map+0x710/0x1050 net/rds/rdma.c:271
 rds_get_mr_for_dest+0x1d4/0x2c0 net/rds/rdma.c:357
 rds_setsockopt+0x6cc/0x980 net/rds/af_rds.c:347
 SYSC_setsockopt net/socket.c:1849 [inline]
 SyS_setsockopt+0x189/0x360 net/socket.c:1828
 do_syscall_64+0x281/0x940 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x42/0xb7
RIP: 0033:0x4456d9
RSP: 002b:00007f4d050aedb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 00000000006dac3c RCX: 00000000004456d9
RDX: 0000000000000007 RSI: 0000000000000114 RDI: 0000000000000004
RBP: 00000000006dac38 R08: 00000000000000a0 R09: 0000000000000000
R10: 0000000020000380 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fffbfb36d6f R14: 00007f4d050af9c0 R15: 0000000000000005
Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 cc 01 00 00 4c 8b bb 80 04 00 00
48
b8 00 00 00 00 00 fc ff df 49 8d 7f 68 48 89 fa 48 c1 ea 03 <80> 3c 02
00 0f
85 9c 01 00 00 4d 8b 7f 68 48 b8 00 00 00 00 00
RIP: rds_ib_get_mr+0x5c/0x230 net/rds/ib_rdma.c:544 RSP:
ffff8801b059f890
---[ end trace 7e1cea13b85473b0 ]---

Reported-by: syzbot+b51c77ef956678a65834@syzkaller.appspotmail.com
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Avinash Repaka <avinash.repaka@oracle.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_frmr.c |  5 +++++
 net/rds/ib_mr.h   |  3 ++-
 net/rds/ib_rdma.c | 21 +++++++++++++--------
 net/rds/rdma.c    | 13 ++++++++-----
 net/rds/rds.h     |  5 ++++-
 net/rds/send.c    | 12 +++++++-----
 6 files changed, 39 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 48332a6ed738..d152e48ea371 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -344,6 +344,11 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
 	struct rds_ib_frmr *frmr;
 	int ret;
 
+	if (!ic) {
+		/* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
 	do {
 		if (ibmr)
 			rds_ib_free_frmr(ibmr, true);
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 0ea4ab017a8c..655f01d427fe 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -115,7 +115,8 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
 			struct rds_info_rdma_connection *iinfo);
 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
-		    struct rds_sock *rs, u32 *key_ret);
+		    struct rds_sock *rs, u32 *key_ret,
+		    struct rds_connection *conn);
 void rds_ib_sync_mr(void *trans_private, int dir);
 void rds_ib_free_mr(void *trans_private, int invalidate);
 void rds_ib_flush_mrs(void);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index e678699268a2..2e49a40a5e11 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -537,11 +537,12 @@ void rds_ib_flush_mrs(void)
 }
 
 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
-		    struct rds_sock *rs, u32 *key_ret)
+		    struct rds_sock *rs, u32 *key_ret,
+		    struct rds_connection *conn)
 {
 	struct rds_ib_device *rds_ibdev;
 	struct rds_ib_mr *ibmr = NULL;
-	struct rds_ib_connection *ic = rs->rs_conn->c_transport_data;
+	struct rds_ib_connection *ic = NULL;
 	int ret;
 
 	rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
@@ -550,6 +551,9 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 		goto out;
 	}
 
+	if (conn)
+		ic = conn->c_transport_data;
+
 	if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
 		ret = -ENODEV;
 		goto out;
@@ -559,17 +563,18 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 		ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
 	else
 		ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
-	if (ibmr)
-		rds_ibdev = NULL;
-
- out:
-	if (!ibmr)
+	if (IS_ERR(ibmr)) {
+		ret = PTR_ERR(ibmr);
 		pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
+	} else {
+		return ibmr;
+	}
 
+ out:
 	if (rds_ibdev)
 		rds_ib_dev_put(rds_ibdev);
 
-	return ibmr;
+	return ERR_PTR(ret);
 }
 
 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 634cfcb7bba6..80920e47f2c7 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -170,7 +170,8 @@ static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
 }
 
 static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
-				u64 *cookie_ret, struct rds_mr **mr_ret)
+			  u64 *cookie_ret, struct rds_mr **mr_ret,
+			  struct rds_conn_path *cp)
 {
 	struct rds_mr *mr = NULL, *found;
 	unsigned int nr_pages;
@@ -269,7 +270,8 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
 	 * Note that dma_map() implies that pending writes are
 	 * flushed to RAM, so no dma_sync is needed here. */
 	trans_private = rs->rs_transport->get_mr(sg, nents, rs,
-						 &mr->r_key);
+						 &mr->r_key,
+						 cp ? cp->cp_conn : NULL);
 
 	if (IS_ERR(trans_private)) {
 		for (i = 0 ; i < nents; i++)
@@ -330,7 +332,7 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen)
 			   sizeof(struct rds_get_mr_args)))
 		return -EFAULT;
 
-	return __rds_rdma_map(rs, &args, NULL, NULL);
+	return __rds_rdma_map(rs, &args, NULL, NULL, NULL);
 }
 
 int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
@@ -354,7 +356,7 @@ int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen)
 	new_args.cookie_addr = args.cookie_addr;
 	new_args.flags = args.flags;
 
-	return __rds_rdma_map(rs, &new_args, NULL, NULL);
+	return __rds_rdma_map(rs, &new_args, NULL, NULL, NULL);
 }
 
 /*
@@ -782,7 +784,8 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
 	    rm->m_rdma_cookie != 0)
 		return -EINVAL;
 
-	return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr);
+	return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie,
+			      &rm->rdma.op_rdma_mr, rm->m_conn_path);
 }
 
 /*
diff --git a/net/rds/rds.h b/net/rds/rds.h
index f2272fb8cd45..60b3b787fbdb 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -464,6 +464,8 @@ struct rds_message {
 			struct scatterlist	*op_sg;
 		} data;
 	};
+
+	struct rds_conn_path *m_conn_path;
 };
 
 /*
@@ -544,7 +546,8 @@ struct rds_transport {
 					unsigned int avail);
 	void (*exit)(void);
 	void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
-			struct rds_sock *rs, u32 *key_ret);
+			struct rds_sock *rs, u32 *key_ret,
+			struct rds_connection *conn);
 	void (*sync_mr)(void *trans_private, int direction);
 	void (*free_mr)(void *trans_private, int invalidate);
 	void (*flush_mrs)(void);
diff --git a/net/rds/send.c b/net/rds/send.c
index 94c7f74909be..59f17a2335f4 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1169,6 +1169,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		rs->rs_conn = conn;
 	}
 
+	if (conn->c_trans->t_mp_capable)
+		cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)];
+	else
+		cpath = &conn->c_path[0];
+
+	rm->m_conn_path = cpath;
+
 	/* Parse any control messages the user may have included. */
 	ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
 	if (ret) {
@@ -1192,11 +1199,6 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		goto out;
 	}
 
-	if (conn->c_trans->t_mp_capable)
-		cpath = &conn->c_path[rds_send_mprds_hash(rs, conn)];
-	else
-		cpath = &conn->c_path[0];
-
 	if (rds_destroy_pending(conn)) {
 		ret = -EAGAIN;
 		goto out;
-- 
cgit v1.2.3-59-g8ed1b


From 36e0f12bbfd3016f495904b35e41c5711707509f Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 26 Jul 2018 23:17:03 +0900
Subject: xdp: add NULL pointer check in __xdp_return()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rhashtable_lookup() can return NULL. so that NULL pointer
check routine should be added.

Fixes: 02b55e5657c3 ("xdp: add MEM_TYPE_ZERO_COPY")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/xdp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9d1f22072d5d..6771f1855b96 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -345,7 +345,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 		rcu_read_lock();
 		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
-		xa->zc_alloc->free(xa->zc_alloc, handle);
+		if (!WARN_ON_ONCE(!xa))
+			xa->zc_alloc->free(xa->zc_alloc, handle);
 		rcu_read_unlock();
 	default:
 		/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
-- 
cgit v1.2.3-59-g8ed1b


From 3eee1f75f2b9c107d4a097e8b640553376a5b171 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 28 Jul 2018 00:17:56 +0200
Subject: bpf: fix bpf_skb_load_bytes_relative pkt length check

The len > skb_headlen(skb) cannot be used as a maximum upper bound
for the packet length since it does not have any relation to the full
linear packet length when filtering is used from upper layers (e.g.
in case of reuseport BPF programs) as by then skb->data, skb->len
already got mangled through __skb_pull() and others.

Fixes: 4e1ec56cdc59 ("bpf: add skb_load_bytes_relative helper")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
---
 net/core/filter.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 06da770f543f..9dfd145eedcc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1712,24 +1712,26 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
 BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
 	   u32, offset, void *, to, u32, len, u32, start_header)
 {
+	u8 *end = skb_tail_pointer(skb);
+	u8 *net = skb_network_header(skb);
+	u8 *mac = skb_mac_header(skb);
 	u8 *ptr;
 
-	if (unlikely(offset > 0xffff || len > skb_headlen(skb)))
+	if (unlikely(offset > 0xffff || len > (end - mac)))
 		goto err_clear;
 
 	switch (start_header) {
 	case BPF_HDR_START_MAC:
-		ptr = skb_mac_header(skb) + offset;
+		ptr = mac + offset;
 		break;
 	case BPF_HDR_START_NET:
-		ptr = skb_network_header(skb) + offset;
+		ptr = net + offset;
 		break;
 	default:
 		goto err_clear;
 	}
 
-	if (likely(ptr >= skb_mac_header(skb) &&
-		   ptr + len <= skb_tail_pointer(skb))) {
+	if (likely(ptr >= mac && ptr + len <= end)) {
 		memcpy(to, ptr, len);
 		return 0;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 71eb5255f55bdb484d35ff7c9a1803f453dfbf82 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 29 Jul 2018 00:28:31 +0900
Subject: bpf: use GFP_ATOMIC instead of GFP_KERNEL in bpf_parse_prog()

bpf_parse_prog() is protected by rcu_read_lock().
so that GFP_KERNEL is not allowed in the bpf_parse_prog().

[51015.579396] =============================
[51015.579418] WARNING: suspicious RCU usage
[51015.579444] 4.18.0-rc6+ #208 Not tainted
[51015.579464] -----------------------------
[51015.579488] ./include/linux/rcupdate.h:303 Illegal context switch in RCU read-side critical section!
[51015.579510] other info that might help us debug this:
[51015.579532] rcu_scheduler_active = 2, debug_locks = 1
[51015.579556] 2 locks held by ip/1861:
[51015.579577]  #0: 00000000a8c12fd1 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x2e0/0x910
[51015.579711]  #1: 00000000bf815f8e (rcu_read_lock){....}, at: lwtunnel_build_state+0x96/0x390
[51015.579842] stack backtrace:
[51015.579869] CPU: 0 PID: 1861 Comm: ip Not tainted 4.18.0-rc6+ #208
[51015.579891] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015
[51015.579911] Call Trace:
[51015.579950]  dump_stack+0x74/0xbb
[51015.580000]  ___might_sleep+0x16b/0x3a0
[51015.580047]  __kmalloc_track_caller+0x220/0x380
[51015.580077]  kmemdup+0x1c/0x40
[51015.580077]  bpf_parse_prog+0x10e/0x230
[51015.580164]  ? kasan_kmalloc+0xa0/0xd0
[51015.580164]  ? bpf_destroy_state+0x30/0x30
[51015.580164]  ? bpf_build_state+0xe2/0x3e0
[51015.580164]  bpf_build_state+0x1bb/0x3e0
[51015.580164]  ? bpf_parse_prog+0x230/0x230
[51015.580164]  ? lock_is_held_type+0x123/0x1a0
[51015.580164]  lwtunnel_build_state+0x1aa/0x390
[51015.580164]  fib_create_info+0x1579/0x33d0
[51015.580164]  ? sched_clock_local+0xe2/0x150
[51015.580164]  ? fib_info_update_nh_saddr+0x1f0/0x1f0
[51015.580164]  ? sched_clock_local+0xe2/0x150
[51015.580164]  fib_table_insert+0x201/0x1990
[51015.580164]  ? lock_downgrade+0x610/0x610
[51015.580164]  ? fib_table_lookup+0x1920/0x1920
[51015.580164]  ? lwtunnel_valid_encap_type.part.6+0xcb/0x3a0
[51015.580164]  ? rtm_to_fib_config+0x637/0xbd0
[51015.580164]  inet_rtm_newroute+0xed/0x1b0
[51015.580164]  ? rtm_to_fib_config+0xbd0/0xbd0
[51015.580164]  rtnetlink_rcv_msg+0x331/0x910
[ ... ]

Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/lwt_bpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index e7e626fb87bb..e45098593dc0 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -217,7 +217,7 @@ static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
 	if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME])
 		return -EINVAL;
 
-	prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL);
+	prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC);
 	if (!prog->name)
 		return -ENOMEM;
 
-- 
cgit v1.2.3-59-g8ed1b


From 9fc12023d6f51551d6ca9ed7e02ecc19d79caf17 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Fri, 27 Jul 2018 18:15:46 +0200
Subject: ipv4: remove BUG_ON() from fib_compute_spec_dst

Remove BUG_ON() from fib_compute_spec_dst routine and check
in_dev pointer during flowi4 data structure initialization.
fib_compute_spec_dst routine can be run concurrently with device removal
where ip_ptr net_device pointer is set to NULL. This can happen
if userspace enables pkt info on UDP rx socket and the device
is removed while traffic is flowing

Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e46cdd310e5f..2998b0e47d4b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -292,19 +292,19 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 		return ip_hdr(skb)->daddr;
 
 	in_dev = __in_dev_get_rcu(dev);
-	BUG_ON(!in_dev);
 
 	net = dev_net(dev);
 
 	scope = RT_SCOPE_UNIVERSE;
 	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
+		bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev);
 		struct flowi4 fl4 = {
 			.flowi4_iif = LOOPBACK_IFINDEX,
 			.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
 			.daddr = ip_hdr(skb)->saddr,
 			.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
 			.flowi4_scope = scope,
-			.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
+			.flowi4_mark = vmark ? skb->mark : 0,
 		};
 		if (!fib_lookup(net, &fl4, &res, 0))
 			return FIB_RES_PREFSRC(net, res);
-- 
cgit v1.2.3-59-g8ed1b


From c8e8cd579bb4265651df8223730105341e61a2d1 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Fri, 27 Jul 2018 22:43:01 +0000
Subject: net: socket: fix potential spectre v1 gadget in socketcall

'call' is a user-controlled value, so sanitize the array index after the
bounds check to avoid speculating past the bounds of the 'nargs' array.

Found with the help of Smatch:

net/socket.c:2508 __do_sys_socketcall() warn: potential spectre issue
'nargs' [r] (local cap)

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 85633622c94d..4ac3b834cce9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -89,6 +89,7 @@
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
+#include <linux/nospec.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -2522,6 +2523,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 
 	if (call < 1 || call > SYS_SENDMMSG)
 		return -EINVAL;
+	call = array_index_nospec(call, SYS_SENDMMSG + 1);
 
 	len = nargs[call];
 	if (len > sizeof(a))
-- 
cgit v1.2.3-59-g8ed1b


From e978de7a6d382ec378830ca2cf38e902df0b6d84 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Fri, 27 Jul 2018 22:43:02 +0000
Subject: net: socket: Fix potential spectre v1 gadget in sock_is_registered

'family' can be a user-controlled value, so sanitize it after the bounds
check to avoid speculative out-of-bounds access.

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 4ac3b834cce9..8c24d5dc4bc8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2690,7 +2690,8 @@ EXPORT_SYMBOL(sock_unregister);
 
 bool sock_is_registered(int family)
 {
-	return family < NPROTO && rcu_access_pointer(net_families[family]);
+	return family < NPROTO &&
+		rcu_access_pointer(net_families[array_index_nospec(family, NPROTO)]);
 }
 
 static int __init sock_init(void)
-- 
cgit v1.2.3-59-g8ed1b


From 383d470936c05554219094a4d364d964cb324827 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 27 Jul 2018 17:19:12 -0400
Subject: tcp_bbr: fix bw probing to raise in-flight data for very small BDPs

For some very small BDPs (with just a few packets) there was a
quantization effect where the target number of packets in flight
during the super-unity-gain (1.25x) phase of gain cycling was
implicitly truncated to a number of packets no larger than the normal
unity-gain (1.0x) phase of gain cycling. This meant that in multi-flow
scenarios some flows could get stuck with a lower bandwidth, because
they did not push enough packets inflight to discover that there was
more bandwidth available. This was really only an issue in multi-flow
LAN scenarios, where RTTs and BDPs are low enough for this to be an
issue.

This fix ensures that gain cycling can raise inflight for small BDPs
by ensuring that in PROBE_BW mode target inflight values with a
super-unity gain are always greater than inflight values with a gain
<= 1. Importantly, this applies whether the inflight value is
calculated for use as a cwnd value, or as a target inflight value for
the end of the super-unity phase in bbr_is_next_cycle_phase() (both
need to be bigger to ensure we can probe with more packets in flight
reliably).

This is a candidate fix for stable releases.

Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Priyaranjan Jha <priyarjha@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_bbr.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 58e2f479ffb4..4bfff3c87e8e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -354,6 +354,10 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
 	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
 	cwnd = (cwnd + 1) & ~1U;
 
+	/* Ensure gain cycling gets inflight above BDP even for small BDPs. */
+	if (bbr->mode == BBR_PROBE_BW && gain > BBR_UNIT)
+		cwnd += 2;
+
 	return cwnd;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 7acf9d4237c46894e0fa0492dd96314a41742e84 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 27 Jul 2018 16:54:44 +0100
Subject: netlink: Do not subscribe to non-existent groups

Make ABI more strict about subscribing to group > ngroups.
Code doesn't check for that and it looks bogus.
(one can subscribe to non-existing group)
Still, it's possible to bind() to all possible groups with (-1)

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: netdev@vger.kernel.org
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 393573a99a5a..ac805caed2e2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1008,6 +1008,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			return err;
 	}
+	groups &= (1UL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
 	if (bound) {
-- 
cgit v1.2.3-59-g8ed1b


From 25432eba9cd8f2ef5afef55be811b010a004b5fa Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@ovn.org>
Date: Sat, 28 Jul 2018 15:26:01 -0700
Subject: openvswitch: meter: Fix setting meter id for new entries

The meter code would create an entry for each new meter.  However, it
would not set the meter id in the new entry, so every meter would appear
to have a meter id of zero.  This commit properly sets the meter id when
adding the entry.

Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure")
Signed-off-by: Justin Pettit <jpettit@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index b891a91577f8..c038e021a591 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -211,6 +211,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 	if (!meter)
 		return ERR_PTR(-ENOMEM);
 
+	meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 	meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
 	meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
 	meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
@@ -280,6 +281,10 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	u32 meter_id;
 	bool failed;
 
+	if (!a[OVS_METER_ATTR_ID]) {
+		return -ENODEV;
+	}
+
 	meter = dp_meter_create(a);
 	if (IS_ERR_OR_NULL(meter))
 		return PTR_ERR(meter);
@@ -298,11 +303,6 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	if (!a[OVS_METER_ATTR_ID]) {
-		err = -ENODEV;
-		goto exit_unlock;
-	}
-
 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
 	/* Cannot fail after this. */
-- 
cgit v1.2.3-59-g8ed1b


From df18b50448fab1dff093731dfd0e25e77e1afcd1 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 30 Jul 2018 16:23:10 +0200
Subject: net/ipv6: fix metrics leak

Since commit d4ead6b34b67 ("net/ipv6: move metrics from dst to
rt6_info"), ipv6 metrics are shared and refcounted. rt6_set_from()
assigns the rt->from pointer and increases the refcount on from's
metrics. This reference is never released.

Introduce the fib6_metrics_release() helper and use it to release the
metrics.

Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d212738e9d10..211a2d437b56 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -167,11 +167,22 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
 	return f6i;
 }
 
+static void fib6_metrics_release(struct fib6_info *f6i)
+{
+	struct dst_metrics *m;
+
+	if (!f6i)
+		return;
+
+	m = f6i->fib6_metrics;
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
+		kfree(m);
+}
+
 void fib6_info_destroy_rcu(struct rcu_head *head)
 {
 	struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
 	struct rt6_exception_bucket *bucket;
-	struct dst_metrics *m;
 
 	WARN_ON(f6i->fib6_node);
 
@@ -201,9 +212,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
 	if (f6i->fib6_nh.nh_dev)
 		dev_put(f6i->fib6_nh.nh_dev);
 
-	m = f6i->fib6_metrics;
-	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
-		kfree(m);
+	fib6_metrics_release(f6i);
 
 	kfree(f6i);
 }
@@ -887,6 +896,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
 
 			from = rcu_dereference_protected(pcpu_rt->from,
 					     lockdep_is_held(&table->tb6_lock));
+			fib6_metrics_release(from);
 			rcu_assign_pointer(pcpu_rt->from, NULL);
 			fib6_info_release(from);
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 61f4b23769f0cc72ae62c9a81cf08f0397d40da8 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Mon, 30 Jul 2018 18:32:36 +0100
Subject: netlink: Don't shift with UB on nlk->ngroups

On i386 nlk->ngroups might be 32 or 0. Which leads to UB, resulting in
hang during boot.
Check for 0 ngroups and use (unsigned long long) as a type to shift.

Fixes: 7acf9d4237c4 ("netlink: Do not subscribe to non-existent groups").
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ac805caed2e2..7d860a22e5fb 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1008,7 +1008,11 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			return err;
 	}
-	groups &= (1UL << nlk->ngroups) - 1;
+
+	if (nlk->ngroups == 0)
+		groups = 0;
+	else
+		groups &= (1ULL << nlk->ngroups) - 1;
 
 	bound = nlk->bound;
 	if (bound) {
-- 
cgit v1.2.3-59-g8ed1b


From 56e2c94f055d328f5f6b0a5c1721cca2f2d4e0a1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 30 Jul 2018 20:09:11 -0700
Subject: inet: frag: enforce memory limits earlier

We currently check current frags memory usage only when
a new frag queue is created. This allows attackers to first
consume the memory budget (default : 4 MB) creating thousands
of frag queues, then sending tiny skbs to exceed high_thresh
limit by 2 to 3 order of magnitude.

Note that before commit 648700f76b03 ("inet: frags: use rhashtables
for reassembly units"), work queue could be starved under DOS,
getting no cpu cycles.
After commit 648700f76b03, only the per frag queue timer can eventually
remove an incomplete frag queue and its skbs.

Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jann Horn <jannh@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Peter Oskolkov <posk@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 1e4cf3ab560f..0d70608cc2e1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -157,9 +157,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 {
 	struct inet_frag_queue *q;
 
-	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
-		return NULL;
-
 	q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
 	if (!q)
 		return NULL;
@@ -204,6 +201,9 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
 {
 	struct inet_frag_queue *fq;
 
+	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
+		return NULL;
+
 	rcu_read_lock();
 
 	fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
-- 
cgit v1.2.3-59-g8ed1b


From 4672694bd4f1aebdab0ad763ae4716e89cb15221 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 30 Jul 2018 21:50:29 -0700
Subject: ipv4: frags: handle possible skb truesize change

ip_frag_queue() might call pskb_pull() on one skb that
is already in the fragment queue.

We need to take care of possible truesize change, or we
might have an imbalance of the netns frags memory usage.

IPv6 is immune to this bug, because RFC5722, Section 4,
amended by Errata ID 3089 states :

  When reassembling an IPv6 datagram, if
  one or more its constituent fragments is determined to be an
  overlapping fragment, the entire datagram (and any constituent
  fragments) MUST be silently discarded.

Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8e9528ebaa8e..d14d741fb05e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -383,11 +383,16 @@ found:
 		int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
 
 		if (i < next->len) {
+			int delta = -next->truesize;
+
 			/* Eat head of the next overlapped fragment
 			 * and leave the loop. The next ones cannot overlap.
 			 */
 			if (!pskb_pull(next, i))
 				goto err;
+			delta += next->truesize;
+			if (delta)
+				add_frag_mem_limit(qp->q.net, delta);
 			next->ip_defrag_offset += i;
 			qp->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
-- 
cgit v1.2.3-59-g8ed1b


From bc5b6c0b62b932626a135f516a41838c510c6eba Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Tue, 31 Jul 2018 21:13:16 +0000
Subject: netlink: Fix spectre v1 gadget in netlink_create()

'protocol' is a user-controlled value, so sanitize it after the bounds
check to avoid using it for speculative out-of-bounds access to arrays
indexed by it.

This addresses the following accesses detected with the help of smatch:

* net/netlink/af_netlink.c:654 __netlink_create() warn: potential
  spectre issue 'nlk_cb_mutex_keys' [w]

* net/netlink/af_netlink.c:654 __netlink_create() warn: potential
  spectre issue 'nlk_cb_mutex_key_strings' [w]

* net/netlink/af_netlink.c:685 netlink_create() warn: potential spectre
  issue 'nl_table' [w] (local cap)

Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7d860a22e5fb..c09d16870f74 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -63,6 +63,7 @@
 #include <linux/hash.h>
 #include <linux/genetlink.h>
 #include <linux/net_namespace.h>
+#include <linux/nospec.h>
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -679,6 +680,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 
 	if (protocol < 0 || protocol >= MAX_LINKS)
 		return -EPROTONOSUPPORT;
+	protocol = array_index_nospec(protocol, MAX_LINKS);
 
 	netlink_lock_table();
 #ifdef CONFIG_MODULES
-- 
cgit v1.2.3-59-g8ed1b


From a94c689e6c9e72e722f28339e12dff191ee5a265 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 31 Jul 2018 17:12:52 -0700
Subject: net: dsa: Do not suspend/resume closed slave_dev

If a DSA slave network device was previously disabled, there is no need
to suspend or resume it.

Fixes: 2446254915a7 ("net: dsa: allow switch drivers to implement suspend/resume hooks")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1e3b6a6d8a40..732369c80644 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1248,6 +1248,9 @@ int dsa_slave_suspend(struct net_device *slave_dev)
 {
 	struct dsa_port *dp = dsa_slave_to_port(slave_dev);
 
+	if (!netif_running(slave_dev))
+		return 0;
+
 	netif_device_detach(slave_dev);
 
 	rtnl_lock();
@@ -1261,6 +1264,9 @@ int dsa_slave_resume(struct net_device *slave_dev)
 {
 	struct dsa_port *dp = dsa_slave_to_port(slave_dev);
 
+	if (!netif_running(slave_dev))
+		return 0;
+
 	netif_device_attach(slave_dev);
 
 	rtnl_lock();
-- 
cgit v1.2.3-59-g8ed1b


From c01f6c9b3207e52fc9973a066a856ddf7a0538d8 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 1 Aug 2018 13:27:23 +0100
Subject: rxrpc: Fix user call ID check in rxrpc_service_prealloc_one

There just check the user call ID isn't already in use, hence should
compare user_call_ID with xcall->user_call_ID, which is current
node's user_call_ID.

Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg")
Suggested-by: David Howells <dhowells@redhat.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/call_accept.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index a9a9be5519b9..9d1e298b784c 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -116,9 +116,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
 		while (*pp) {
 			parent = *pp;
 			xcall = rb_entry(parent, struct rxrpc_call, sock_node);
-			if (user_call_ID < call->user_call_ID)
+			if (user_call_ID < xcall->user_call_ID)
 				pp = &(*pp)->rb_left;
-			else if (user_call_ID > call->user_call_ID)
+			else if (user_call_ID > xcall->user_call_ID)
 				pp = &(*pp)->rb_right;
 			else
 				goto id_in_use;
-- 
cgit v1.2.3-59-g8ed1b


From e6aed040eafb4ce1881bbc59a225f6b27d250396 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 1 Aug 2018 21:32:30 -0700
Subject: Revert "net/ipv6: fix metrics leak"

This reverts commit df18b50448fab1dff093731dfd0e25e77e1afcd1.

This change causes other problems and use-after-free situations as
found by syzbot.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 211a2d437b56..d212738e9d10 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -167,22 +167,11 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
 	return f6i;
 }
 
-static void fib6_metrics_release(struct fib6_info *f6i)
-{
-	struct dst_metrics *m;
-
-	if (!f6i)
-		return;
-
-	m = f6i->fib6_metrics;
-	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
-		kfree(m);
-}
-
 void fib6_info_destroy_rcu(struct rcu_head *head)
 {
 	struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
 	struct rt6_exception_bucket *bucket;
+	struct dst_metrics *m;
 
 	WARN_ON(f6i->fib6_node);
 
@@ -212,7 +201,9 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
 	if (f6i->fib6_nh.nh_dev)
 		dev_put(f6i->fib6_nh.nh_dev);
 
-	fib6_metrics_release(f6i);
+	m = f6i->fib6_metrics;
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
+		kfree(m);
 
 	kfree(f6i);
 }
@@ -896,7 +887,6 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
 
 			from = rcu_dereference_protected(pcpu_rt->from,
 					     lockdep_is_held(&table->tb6_lock));
-			fib6_metrics_release(from);
 			rcu_assign_pointer(pcpu_rt->from, NULL);
 			fib6_info_release(from);
 		}
-- 
cgit v1.2.3-59-g8ed1b