From 04c2cf34362f133be09878bd752f8b014318b59a Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali.goldstein@intel.com>
Date: Tue, 11 Jul 2017 10:07:25 +0300
Subject: mac80211: add api to start ba session timer expired flow

Some drivers handle rx buffer reordering internally (and by extension
handle also the rx ba session timer internally), but do not ofload the
addba/delba negotiation.
Add an api for these drivers to properly tear-down the ba session,
including sending a delba.

Signed-off-by: Naftali Goldstein <naftali.goldstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 net/mac80211/agg-rx.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 8708cbe8af5b..2b36eff5d97e 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -7,7 +7,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015-2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -466,3 +466,23 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl);
+
+void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif,
+				   const u8 *addr, unsigned int tid)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_local *local = sdata->local;
+	struct sta_info *sta;
+
+	rcu_read_lock();
+	sta = sta_info_get_bss(sdata, addr);
+	if (!sta)
+		goto unlock;
+
+	set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired);
+	ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+
+ unlock:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(ieee80211_rx_ba_timer_expired);
-- 
cgit v1.2.3-59-g8ed1b


From e71cb9e00922902ba0519f37d09145f117dc02b3 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Wed, 9 Aug 2017 16:46:09 -0400
Subject: net: dsa: ksz: fix skb freeing

The DSA layer frees the original skb when an xmit function returns NULL,
meaning an error occurred. But if the tagging code copied the original
skb, it is responsible of freeing the copy if an error occurs.

The ksz tagging code currently has two issues: if skb_put_padto fails,
the skb copy is not freed, and the original skb will be freed twice.

To fix that, move skb_put_padto inside both branches of the skb_tailroom
condition, before freeing the original skb, and free the copy on error.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Woojung Huh <woojung.huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_ksz.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index fab41de8e983..de66ca8e6201 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -42,6 +42,9 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
 	padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
 
 	if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+		if (skb_put_padto(skb, skb->len + padlen))
+			return NULL;
+
 		nskb = skb;
 	} else {
 		nskb = alloc_skb(NET_IP_ALIGN + skb->len +
@@ -56,13 +59,15 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb_set_transport_header(nskb,
 					 skb_transport_header(skb) - skb->head);
 		skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
+
+		if (skb_put_padto(nskb, nskb->len + padlen)) {
+			kfree_skb(nskb);
+			return NULL;
+		}
+
 		kfree_skb(skb);
 	}
 
-	/* skb is freed when it fails */
-	if (skb_put_padto(nskb, nskb->len + padlen))
-		return NULL;
-
 	tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
 	tag[0] = 0;
 	tag[1] = 1 << p->dp->index; /* destination port */
-- 
cgit v1.2.3-59-g8ed1b


From 8d55373875052b891ae72c9bcaf9c2d7178676c0 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Thu, 10 Aug 2017 12:31:40 +0300
Subject: net/sched/hfsc: allocate tcf block for hfsc root class

Without this filters cannot be attached.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure")
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b52f74610dc7..3ad02bbe6903 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1428,6 +1428,10 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 		return err;
 	q->eligible = RB_ROOT;
 
+	err = tcf_block_get(&q->root.block, &q->root.filter_list);
+	if (err)
+		goto err_tcf;
+
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
 	q->root.sched   = q;
@@ -1447,6 +1451,10 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 	qdisc_watchdog_init(&q->watchdog, sch);
 
 	return 0;
+
+err_tcf:
+	qdisc_class_hash_destroy(&q->clhash);
+	return err;
 }
 
 static int
-- 
cgit v1.2.3-59-g8ed1b


From 2ed46ce45ec02f6b2188419acdf372a144e06fb5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 11 Aug 2017 18:31:25 +0200
Subject: bpf: fix two missing target_size settings in bpf_convert_ctx_access

When CONFIG_NET_SCHED or CONFIG_NET_RX_BUSY_POLL is /not/ set and
we try a narrow __sk_buff load of tc_index or napi_id, respectively,
then verifier rightfully complains that it's misconfigured, because
we need to set target_size in each of the two cases. The rewrite
for the ctx access is just a dummy op, but needs to pass, so fix
this up.

Fixes: f96da09473b5 ("bpf: simplify narrower ctx access")
Reported-by: Shubham Bansal <illusionist.neo@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index f44fc22fd45a..6280a602604c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3505,6 +3505,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 					      bpf_target_off(struct sk_buff, tc_index, 2,
 							     target_size));
 #else
+		*target_size = 2;
 		if (type == BPF_WRITE)
 			*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
 		else
@@ -3520,6 +3521,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
 #else
+		*target_size = 4;
 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
 #endif
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 2c87d63ac853550e734edfd45e1be5e5aa44fbcc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 14 Aug 2017 00:52:58 +0200
Subject: ipv4: route: fix inet_rtm_getroute induced crash

"ip route get $daddr iif eth0 from $saddr" causes:
 BUG: KASAN: use-after-free in ip_route_input_rcu+0x1535/0x1b50
 Call Trace:
  ip_route_input_rcu+0x1535/0x1b50
  ip_route_input_noref+0xf9/0x190
  tcp_v4_early_demux+0x1a4/0x2b0
  ip_rcv+0xbcb/0xc05
  __netif_receive_skb+0x9c/0xd0
  netif_receive_skb_internal+0x5a8/0x890

Problem is that inet_rtm_getroute calls either ip_route_input_rcu (if an
iif was provided) or ip_route_output_key_hash_rcu.

But ip_route_input_rcu, unlike ip_route_output_key_hash_rcu, already
associates the dst_entry with the skb.  This clears the SKB_DST_NOREF
bit (i.e. skb_dst_drop will release/free the entry while it should not).

Thus only set the dst if we called ip_route_output_key_hash_rcu().

I tested this patch by running:
 while true;do ip r get 10.0.1.2;done > /dev/null &
 while true;do ip r get 10.0.1.2 iif eth0  from 10.0.1.1;done > /dev/null &
... and saw no crash or memory leak.

Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
Cc: David Ahern <dsahern@gmail.com>
Fixes: ba52d61e0ff ("ipv4: route: restore skb_dst_set in inet_rtm_getroute")
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0383e66f59bc..7effa62beed3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2750,12 +2750,13 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		err = 0;
 		if (IS_ERR(rt))
 			err = PTR_ERR(rt);
+		else
+			skb_dst_set(skb, &rt->dst);
 	}
 
 	if (err)
 		goto errout_free;
 
-	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
-- 
cgit v1.2.3-59-g8ed1b


From fed5f5718c4989a03b1b4cdc0c7f273c3c74ee9e Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 14 Aug 2017 17:55:56 +0200
Subject: tipc: accept PACKET_MULTICAST packets

On L2 bearers, the TIPC broadcast function is sending out packets using
the corresponding L2 broadcast address. At reception, we filter such
packets under the assumption that they will also be delivered as
broadcast packets.

This assumption doesn't always hold true. Under high load, we have seen
that a switch may convert the destination address and deliver the packet
as a PACKET_MULTICAST, something leading to inadvertently dropped
packets and a stale and reset broadcast link.

We fix this by extending the reception filtering to accept packets of
type PACKET_MULTICAST.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index d174ee3254ee..767e0537dde5 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -596,7 +596,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(dev->tipc_ptr);
 	if (likely(b && test_bit(0, &b->up) &&
-		   (skb->pkt_type <= PACKET_BROADCAST))) {
+		   (skb->pkt_type <= PACKET_MULTICAST))) {
 		skb->next = NULL;
 		tipc_rcv(dev_net(dev), skb, b);
 		rcu_read_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From 59a361bc6f6e91d57f25ff0aebb0e646beb3b41d Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 14 Aug 2017 18:28:49 +0200
Subject: tipc: avoid inheriting msg_non_seq flag when message is returned

In the function msg_reverse(), we reverse the header while trying to
reuse the original buffer whenever possible. Those rejected/returned
messages are always transmitted as unicast, but the msg_non_seq field
is not explicitly set to zero as it should be.

We have seen cases where multicast senders set the message type to
"NOT dest_droppable", meaning that a multicast message shorter than
one MTU will be returned, e.g., during receive buffer overflow, by
reusing the original buffer. This has the effect that even the
'msg_non_seq' field is inadvertently inherited by the rejected message,
although it is now sent as a unicast message. This again leads the
receiving unicast link endpoint to steer the packet toward the broadcast
link receive function, where it is dropped. The affected unicast link is
thereafter (after 100 failed retransmissions) declared 'stale' and
reset.

We fix this by unconditionally setting the 'msg_non_seq' flag to zero
for all rejected/returned messages.

Reported-by: Canh Duc Luu <canh.d.luu@dektech.com.au>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ab3087687a32..dcd90e6fa7c3 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -513,6 +513,7 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 
 	/* Now reverse the concerned fields */
 	msg_set_errcode(hdr, err);
+	msg_set_non_seq(hdr, 0);
 	msg_set_origport(hdr, msg_destport(&ohdr));
 	msg_set_destport(hdr, msg_origport(&ohdr));
 	msg_set_destnode(hdr, msg_prevnode(&ohdr));
-- 
cgit v1.2.3-59-g8ed1b


From 539a06baedd06127389b254f6b9f016ca072da13 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 14 Aug 2017 18:04:24 +0200
Subject: tcp: ulp: avoid module refcnt leak in tcp_set_ulp

__tcp_ulp_find_autoload returns tcp_ulp_ops after taking a reference on
the module. Then, if ->init fails, tcp_set_ulp propagates the error but
nothing releases that reference.

Fixes: 734942cc4ea6 ("tcp: ULP infrastructure")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ulp.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 2417f55374c5..6bb9e14c710a 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -122,14 +122,14 @@ int tcp_set_ulp(struct sock *sk, const char *name)
 
 	ulp_ops = __tcp_ulp_find_autoload(name);
 	if (!ulp_ops)
-		err = -ENOENT;
-	else
-		err = ulp_ops->init(sk);
+		return -ENOENT;
 
-	if (err)
-		goto out;
+	err = ulp_ops->init(sk);
+	if (err) {
+		module_put(ulp_ops->owner);
+		return err;
+	}
 
 	icsk->icsk_ulp_ops = ulp_ops;
- out:
-	return err;
+	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 36f41f8fc6d8aa9f8c9072d66ff7cf9055f5e69b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Aug 2017 10:16:45 -0700
Subject: af_key: do not use GFP_KERNEL in atomic contexts

pfkey_broadcast() might be called from non process contexts,
we can not use GFP_KERNEL in these cases [1].

This patch partially reverts commit ba51b6be38c1 ("net: Fix RCU splat in
af_key"), only keeping the GFP_ATOMIC forcing under rcu_read_lock()
section.

[1] : syzkaller reported :

in_atomic(): 1, irqs_disabled(): 0, pid: 2932, name: syzkaller183439
3 locks held by syzkaller183439/2932:
 #0:  (&net->xfrm.xfrm_cfg_mutex){+.+.+.}, at: [<ffffffff83b43888>] pfkey_sendmsg+0x4c8/0x9f0 net/key/af_key.c:3649
 #1:  (&pfk->dump_lock){+.+.+.}, at: [<ffffffff83b467f6>] pfkey_do_dump+0x76/0x3f0 net/key/af_key.c:293
 #2:  (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [<ffffffff83957632>] spin_lock_bh include/linux/spinlock.h:304 [inline]
 #2:  (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [<ffffffff83957632>] xfrm_policy_walk+0x192/0xa30 net/xfrm/xfrm_policy.c:1028
CPU: 0 PID: 2932 Comm: syzkaller183439 Not tainted 4.13.0-rc4+ #24
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x194/0x257 lib/dump_stack.c:52
 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:5994
 __might_sleep+0x95/0x190 kernel/sched/core.c:5947
 slab_pre_alloc_hook mm/slab.h:416 [inline]
 slab_alloc mm/slab.c:3383 [inline]
 kmem_cache_alloc+0x24b/0x6e0 mm/slab.c:3559
 skb_clone+0x1a0/0x400 net/core/skbuff.c:1037
 pfkey_broadcast_one+0x4b2/0x6f0 net/key/af_key.c:207
 pfkey_broadcast+0x4ba/0x770 net/key/af_key.c:281
 dump_sp+0x3d6/0x500 net/key/af_key.c:2685
 xfrm_policy_walk+0x2f1/0xa30 net/xfrm/xfrm_policy.c:1042
 pfkey_dump_sp+0x42/0x50 net/key/af_key.c:2695
 pfkey_do_dump+0xaa/0x3f0 net/key/af_key.c:299
 pfkey_spddump+0x1a0/0x210 net/key/af_key.c:2722
 pfkey_process+0x606/0x710 net/key/af_key.c:2814
 pfkey_sendmsg+0x4d6/0x9f0 net/key/af_key.c:3650
sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 ___sys_sendmsg+0x755/0x890 net/socket.c:2035
 __sys_sendmsg+0xe5/0x210 net/socket.c:2069
 SYSC_sendmsg net/socket.c:2080 [inline]
 SyS_sendmsg+0x2d/0x50 net/socket.c:2076
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x445d79
RSP: 002b:00007f32447c1dc8 EFLAGS: 00000202 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000445d79
RDX: 0000000000000000 RSI: 000000002023dfc8 RDI: 0000000000000008
RBP: 0000000000000086 R08: 00007f32447c2700 R09: 00007f32447c2700
R10: 00007f32447c2700 R11: 0000000000000202 R12: 0000000000000000
R13: 00007ffe33edec4f R14: 00007f32447c29c0 R15: 0000000000000000

Fixes: ba51b6be38c1 ("net: Fix RCU splat in af_key")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: David Ahern <dsa@cumulusnetworks.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 48 ++++++++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index ca9d3ae665e7..98f4d8211b9a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -228,7 +228,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
 #define BROADCAST_ONE		1
 #define BROADCAST_REGISTERED	2
 #define BROADCAST_PROMISC_ONLY	4
-static int pfkey_broadcast(struct sk_buff *skb,
+static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 			   int broadcast_flags, struct sock *one_sk,
 			   struct net *net)
 {
@@ -278,7 +278,7 @@ static int pfkey_broadcast(struct sk_buff *skb,
 	rcu_read_unlock();
 
 	if (one_sk != NULL)
-		err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk);
+		err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
 
 	kfree_skb(skb2);
 	kfree_skb(skb);
@@ -311,7 +311,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
 		hdr = (struct sadb_msg *) pfk->dump.skb->data;
 		hdr->sadb_msg_seq = 0;
 		hdr->sadb_msg_errno = rc;
-		pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+		pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
 				&pfk->sk, sock_net(&pfk->sk));
 		pfk->dump.skb = NULL;
 	}
@@ -355,7 +355,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
 			     sizeof(uint64_t));
 
-	pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
+	pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk));
 
 	return 0;
 }
@@ -1389,7 +1389,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 	xfrm_state_put(x);
 
-	pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net);
+	pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net);
 
 	return 0;
 }
@@ -1476,7 +1476,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
 	hdr->sadb_msg_seq = c->seq;
 	hdr->sadb_msg_pid = c->portid;
 
-	pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x));
+	pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x));
 
 	return 0;
 }
@@ -1589,7 +1589,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
 	out_hdr->sadb_msg_reserved = 0;
 	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
 	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
-	pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk));
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
 
 	return 0;
 }
@@ -1694,8 +1694,8 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
 		return -ENOBUFS;
 	}
 
-	pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk));
-
+	pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk,
+			sock_net(sk));
 	return 0;
 }
 
@@ -1712,7 +1712,8 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
 	hdr->sadb_msg_errno = (uint8_t) 0;
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
 
-	return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
+	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk,
+			       sock_net(sk));
 }
 
 static int key_notify_sa_flush(const struct km_event *c)
@@ -1733,7 +1734,7 @@ static int key_notify_sa_flush(const struct km_event *c)
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
 	hdr->sadb_msg_reserved = 0;
 
-	pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net);
+	pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
 
 	return 0;
 }
@@ -1790,7 +1791,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
 	out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
 
 	if (pfk->dump.skb)
-		pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+		pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
 				&pfk->sk, sock_net(&pfk->sk));
 	pfk->dump.skb = out_skb;
 
@@ -1878,7 +1879,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb
 		new_hdr->sadb_msg_errno = 0;
 	}
 
-	pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk));
+	pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
 	return 0;
 }
 
@@ -2206,7 +2207,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
 	out_hdr->sadb_msg_errno = 0;
 	out_hdr->sadb_msg_seq = c->seq;
 	out_hdr->sadb_msg_pid = c->portid;
-	pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp));
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
 	return 0;
 
 }
@@ -2426,7 +2427,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc
 	out_hdr->sadb_msg_errno = 0;
 	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
 	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
-	pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp));
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp));
 	err = 0;
 
 out:
@@ -2682,7 +2683,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
 	out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
 
 	if (pfk->dump.skb)
-		pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+		pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
 				&pfk->sk, sock_net(&pfk->sk));
 	pfk->dump.skb = out_skb;
 
@@ -2739,7 +2740,7 @@ static int key_notify_policy_flush(const struct km_event *c)
 	hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
 	hdr->sadb_msg_reserved = 0;
-	pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net);
+	pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
 	return 0;
 
 }
@@ -2803,7 +2804,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
 	void *ext_hdrs[SADB_EXT_MAX];
 	int err;
 
-	pfkey_broadcast(skb_clone(skb, GFP_KERNEL),
+	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
 			BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
 
 	memset(ext_hdrs, 0, sizeof(ext_hdrs));
@@ -3024,7 +3025,8 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c)
 	out_hdr->sadb_msg_seq = 0;
 	out_hdr->sadb_msg_pid = 0;
 
-	pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+			xs_net(x));
 	return 0;
 }
 
@@ -3212,7 +3214,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 		       xfrm_ctx->ctx_len);
 	}
 
-	return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+			       xs_net(x));
 }
 
 static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
@@ -3408,7 +3411,8 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
 	n_port->sadb_x_nat_t_port_port = sport;
 	n_port->sadb_x_nat_t_port_reserved = 0;
 
-	return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+			       xs_net(x));
 }
 
 #ifdef CONFIG_NET_KEY_MIGRATE
@@ -3599,7 +3603,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 	}
 
 	/* broadcast migrate message to sockets */
-	pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net);
+	pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net);
 
 	return 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From e5645f51ba99738b0e5d708edf9c6454f33b9310 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Mon, 14 Aug 2017 10:44:59 -0700
Subject: ipv6: release rt6->rt6i_idev properly during ifdown

When a dst is created by addrconf_dst_alloc() for a host route or an
anycast route, dst->dev points to loopback dev while rt6->rt6i_idev
points to a real device.
When the real device goes down, the current cleanup code only checks for
dst->dev and assumes rt6->rt6i_idev->dev is the same. This causes the
refcount leak on the real device in the above situation.
This patch makes sure to always release the refcount taken on
rt6->rt6i_idev during dst_dev_put().

Fixes: 587fea741134 ("ipv6: mark DST_NOGC and remove the operation of
dst_free()")
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Tested-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a640fbcba15d..99d4727f2b18 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -417,14 +417,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 	struct net_device *loopback_dev =
 		dev_net(dev)->loopback_dev;
 
-	if (dev != loopback_dev) {
-		if (idev && idev->dev == dev) {
-			struct inet6_dev *loopback_idev =
-				in6_dev_get(loopback_dev);
-			if (loopback_idev) {
-				rt->rt6i_idev = loopback_idev;
-				in6_dev_put(idev);
-			}
+	if (idev && idev->dev != loopback_dev) {
+		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
+		if (loopback_idev) {
+			rt->rt6i_idev = loopback_idev;
+			in6_dev_put(idev);
 		}
 	}
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7749d4ff88d31b0be17c8683143135adaaadc6a7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Aug 2017 14:10:25 -0700
Subject: dccp: purge write queue in dccp_destroy_sock()

syzkaller reported that DCCP could have a non empty
write queue at dismantle time.

WARNING: CPU: 1 PID: 2953 at net/core/stream.c:199 sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199
Kernel panic - not syncing: panic_on_warn set ...

CPU: 1 PID: 2953 Comm: syz-executor0 Not tainted 4.13.0-rc4+ #2
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x194/0x257 lib/dump_stack.c:52
 panic+0x1e4/0x417 kernel/panic.c:180
 __warn+0x1c4/0x1d9 kernel/panic.c:541
 report_bug+0x211/0x2d0 lib/bug.c:183
 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190
 do_trap_no_signal arch/x86/kernel/traps.c:224 [inline]
 do_trap+0x260/0x390 arch/x86/kernel/traps.c:273
 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323
 invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:846
RIP: 0010:sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199
RSP: 0018:ffff8801d182f108 EFLAGS: 00010297
RAX: ffff8801d1144140 RBX: ffff8801d13cb280 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffffffff85137b00 RDI: ffff8801d13cb280
RBP: ffff8801d182f148 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d13cb4d0
R13: ffff8801d13cb3b8 R14: ffff8801d13cb300 R15: ffff8801d13cb3b8
 inet_csk_destroy_sock+0x175/0x3f0 net/ipv4/inet_connection_sock.c:835
 dccp_close+0x84d/0xc10 net/dccp/proto.c:1067
 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425
 sock_release+0x8d/0x1e0 net/socket.c:597
 sock_close+0x16/0x20 net/socket.c:1126
 __fput+0x327/0x7e0 fs/file_table.c:210
 ____fput+0x15/0x20 fs/file_table.c:246
 task_work_run+0x18a/0x260 kernel/task_work.c:116
 exit_task_work include/linux/task_work.h:21 [inline]
 do_exit+0xa32/0x1b10 kernel/exit.c:865
 do_group_exit+0x149/0x400 kernel/exit.c:969
 get_signal+0x7e8/0x17e0 kernel/signal.c:2330
 do_signal+0x94/0x1ee0 arch/x86/kernel/signal.c:808
 exit_to_usermode_loop+0x21c/0x2d0 arch/x86/entry/common.c:157
 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline]
 syscall_return_slowpath+0x3a7/0x450 arch/x86/entry/common.c:263

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9fe25bf63296..86bc40ba6ba5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -201,10 +201,7 @@ void dccp_destroy_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	/*
-	 * DCCP doesn't use sk_write_queue, just sk_send_head
-	 * for retransmissions
-	 */
+	__skb_queue_purge(&sk->sk_write_queue);
 	if (sk->sk_send_head != NULL) {
 		kfree_skb(sk->sk_send_head);
 		sk->sk_send_head = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From d624d276d1ddacbcb12ad96832ce0c7b82cd25db Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Aug 2017 17:44:43 -0700
Subject: tcp: fix possible deadlock in TCP stack vs BPF filter

Filtering the ACK packet was not put at the right place.

At this place, we already allocated a child and put it
into accept queue.

We absolutely need to call tcp_child_process() to release
its spinlock, or we will deadlock at accept() or close() time.

Found by syzkaller team (Thanks a lot !)

Fixes: 8fac365f63c8 ("tcp: Add a tcp_filter hook before handle ack packet")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Chenbo Feng <fengc@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 4 ++--
 net/ipv6/tcp_ipv6.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a20e7f03d5f7..e9252c7df809 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1722,6 +1722,8 @@ process:
 		 */
 		sock_hold(sk);
 		refcounted = true;
+		if (tcp_filter(sk, skb))
+			goto discard_and_relse;
 		nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
@@ -1729,8 +1731,6 @@ process:
 		}
 		if (nsk == sk) {
 			reqsk_put(req);
-		} else if (tcp_filter(sk, skb)) {
-			goto discard_and_relse;
 		} else if (tcp_child_process(sk, nsk, skb)) {
 			tcp_v4_send_reset(nsk, skb);
 			goto discard_and_relse;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2521690d62d6..206210125fd7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1456,6 +1456,8 @@ process:
 		}
 		sock_hold(sk);
 		refcounted = true;
+		if (tcp_filter(sk, skb))
+			goto discard_and_relse;
 		nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
@@ -1464,8 +1466,6 @@ process:
 		if (nsk == sk) {
 			reqsk_put(req);
 			tcp_v6_restore_cb(skb);
-		} else if (tcp_filter(sk, skb)) {
-			goto discard_and_relse;
 		} else if (tcp_child_process(sk, nsk, skb)) {
 			tcp_v6_send_reset(nsk, skb);
 			goto discard_and_relse;
-- 
cgit v1.2.3-59-g8ed1b


From 12d94a804946af291e24b80fc53ec86264765781 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Aug 2017 04:09:51 -0700
Subject: ipv6: fix NULL dereference in ip6_route_dev_notify()

Based on a syzkaller report [1], I found that a per cpu allocation
failure in snmp6_alloc_dev() would then lead to NULL dereference in
ip6_route_dev_notify().

It seems this is a very old bug, thus no Fixes tag in this submission.

Let's add in6_dev_put_clear() helper, as we will probably use
it elsewhere (once available/present in net-next)

[1]
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 17294 Comm: syz-executor6 Not tainted 4.13.0-rc2+ #10
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
task: ffff88019f456680 task.stack: ffff8801c6e58000
RIP: 0010:__read_once_size include/linux/compiler.h:250 [inline]
RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline]
RIP: 0010:refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178
RSP: 0018:ffff8801c6e5f1b0 EFLAGS: 00010202
RAX: 0000000000000037 RBX: dffffc0000000000 RCX: ffffc90005d25000
RDX: ffff8801c6e5f218 RSI: ffffffff82342bbf RDI: 0000000000000001
RBP: ffff8801c6e5f240 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff10038dcbe37
R13: 0000000000000006 R14: 0000000000000001 R15: 00000000000001b8
FS:  00007f21e0429700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000001ddbc22000 CR3: 00000001d632b000 CR4: 00000000001426e0
DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600
Call Trace:
 refcount_dec_and_test+0x1a/0x20 lib/refcount.c:211
 in6_dev_put include/net/addrconf.h:335 [inline]
 ip6_route_dev_notify+0x1c9/0x4a0 net/ipv6/route.c:3732
 notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
 call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1678
 call_netdevice_notifiers net/core/dev.c:1694 [inline]
 rollback_registered_many+0x91c/0xe80 net/core/dev.c:7107
 rollback_registered+0x1be/0x3c0 net/core/dev.c:7149
 register_netdevice+0xbcd/0xee0 net/core/dev.c:7587
 register_netdev+0x1a/0x30 net/core/dev.c:7669
 loopback_net_init+0x76/0x160 drivers/net/loopback.c:214
 ops_init+0x10a/0x570 net/core/net_namespace.c:118
 setup_net+0x313/0x710 net/core/net_namespace.c:294
 copy_net_ns+0x27c/0x580 net/core/net_namespace.c:418
 create_new_namespaces+0x425/0x880 kernel/nsproxy.c:107
 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:206
 SYSC_unshare kernel/fork.c:2347 [inline]
 SyS_unshare+0x653/0xfa0 kernel/fork.c:2297
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x4512c9
RSP: 002b:00007f21e0428c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000110
RAX: ffffffffffffffda RBX: 0000000000718150 RCX: 00000000004512c9
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000062020200
RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b973d
R13: 00000000ffffffff R14: 000000002001d000 R15: 00000000000002dd
Code: 50 2b 34 82 c7 00 f1 f1 f1 f1 c7 40 04 04 f2 f2 f2 c7 40 08 f3 f3
f3 f3 e8 a1 43 39 ff 4c 89 f8 48 8b 95 70 ff ff ff 48 c1 e8 03 <0f> b6
0c 18 4c 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85
RIP: __read_once_size include/linux/compiler.h:250 [inline] RSP:
ffff8801c6e5f1b0
RIP: atomic_read arch/x86/include/asm/atomic.h:26 [inline] RSP:
ffff8801c6e5f1b0
RIP: refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP:
ffff8801c6e5f1b0
---[ end trace e441d046c6410d31 ]---

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 10 ++++++++++
 net/ipv6/route.c       |  6 +++---
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6df79e96a780..f44ff2476758 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -336,6 +336,16 @@ static inline void in6_dev_put(struct inet6_dev *idev)
 		in6_dev_finish_destroy(idev);
 }
 
+static inline void in6_dev_put_clear(struct inet6_dev **pidev)
+{
+	struct inet6_dev *idev = *pidev;
+
+	if (idev) {
+		in6_dev_put(idev);
+		*pidev = NULL;
+	}
+}
+
 static inline void __in6_dev_put(struct inet6_dev *idev)
 {
 	refcount_dec(&idev->refcnt);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 99d4727f2b18..94d6a13d47f0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3721,10 +3721,10 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 		/* NETDEV_UNREGISTER could be fired for multiple times by
 		 * netdev_wait_allrefs(). Make sure we only call this once.
 		 */
-		in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
+		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-		in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
-		in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
+		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
+		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
 #endif
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 187e5b3ac84d3421d2de3aca949b2791fbcad554 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Aug 2017 05:26:17 -0700
Subject: ipv4: fix NULL dereference in free_fib_info_rcu()

If fi->fib_metrics could not be allocated in fib_create_info()
we attempt to dereference a NULL pointer in free_fib_info_rcu() :

    m = fi->fib_metrics;
    if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
            kfree(m);

Before my recent patch, we used to call kfree(NULL) and nothing wrong
happened.

Instead of using RCU to defer freeing while we are under memory stress,
it seems better to take immediate action.

This was reported by syzkaller team.

Fixes: 3fb07daff8e9 ("ipv4: add reference counting to metrics")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b8d18171cca3..ec3a9ce281a6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1083,15 +1083,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
 	if (!fi)
 		goto failure;
-	fib_info_cnt++;
 	if (cfg->fc_mx) {
 		fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
-		if (!fi->fib_metrics)
-			goto failure;
+		if (unlikely(!fi->fib_metrics)) {
+			kfree(fi);
+			return ERR_PTR(err);
+		}
 		atomic_set(&fi->fib_metrics->refcnt, 1);
-	} else
+	} else {
 		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
-
+	}
+	fib_info_cnt++;
 	fi->fib_net = net;
 	fi->fib_protocol = cfg->fc_protocol;
 	fi->fib_scope = cfg->fc_scope;
-- 
cgit v1.2.3-59-g8ed1b


From 898904226b5a6dee657f23cf51e385f50da22596 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 15 Aug 2017 16:35:21 +0300
Subject: net_sched: reset pointers to tcf blocks in classful qdiscs'
 destructors

Traffic filters could keep direct pointers to classes in classful qdisc,
thus qdisc destruction first removes all filters before freeing classes.
Class destruction methods also tries to free attached filters but now
this isn't safe because tcf_block_put() unlike to tcf_destroy_chain()
cannot be called second time.

This patch set class->block to NULL after first tcf_block_put() and
turn second call into no-op.

Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c  | 4 +++-
 net/sched/sch_cbq.c  | 4 +++-
 net/sched/sch_hfsc.c | 4 +++-
 net/sched/sch_htb.c  | 4 +++-
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 572fe2584e48..c403c87aff7a 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -572,8 +572,10 @@ static void atm_tc_destroy(struct Qdisc *sch)
 	struct atm_flow_data *flow, *tmp;
 
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
-	list_for_each_entry(flow, &p->flows, list)
+	list_for_each_entry(flow, &p->flows, list) {
 		tcf_block_put(flow->block);
+		flow->block = NULL;
+	}
 
 	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
 		if (flow->ref > 1)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 481036f6b54e..780db43300b1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1431,8 +1431,10 @@ static void cbq_destroy(struct Qdisc *sch)
 	 * be bound to classes which have been destroyed already. --TGR '04
 	 */
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
 			tcf_block_put(cl->block);
+			cl->block = NULL;
+		}
 	}
 	for (h = 0; h < q->clhash.hashsize; h++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ad02bbe6903..fd15200f8627 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1530,8 +1530,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) {
 			tcf_block_put(cl->block);
+			cl->block = NULL;
+		}
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 203286ab4427..5d65ec5207e9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1258,8 +1258,10 @@ static void htb_destroy(struct Qdisc *sch)
 	tcf_block_put(q->block);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			tcf_block_put(cl->block);
+			cl->block = NULL;
+		}
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
 		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
-- 
cgit v1.2.3-59-g8ed1b


From 325d5dc3f7e7c2840b65e4a2988c082c2c0025c5 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 15 Aug 2017 16:37:04 +0300
Subject: net_sched/sfq: update hierarchical backlog when drop packet

When sfq_enqueue() drops head packet or packet from another queue it
have to update backlog at upper qdiscs too.

Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index f80ea2cc5f1f..82469ef9655e 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -437,6 +437,7 @@ congestion_drop:
 		qdisc_drop(head, sch, to_free);
 
 		slot_queue_add(slot, skb);
+		qdisc_tree_reduce_backlog(sch, 0, delta);
 		return NET_XMIT_CN;
 	}
 
@@ -468,8 +469,10 @@ enqueue:
 	/* Return Congestion Notification only if we dropped a packet
 	 * from this flow.
 	 */
-	if (qlen != slot->qlen)
+	if (qlen != slot->qlen) {
+		qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
 		return NET_XMIT_CN;
+	}
 
 	/* As we dropped a packet, better let upper stack know this */
 	qdisc_tree_reduce_backlog(sch, 1, dropped);
-- 
cgit v1.2.3-59-g8ed1b


From c90e95147c27b1780e76c6e8fea1b5c78d7d387f Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 15 Aug 2017 16:39:05 +0300
Subject: net_sched: remove warning from qdisc_hash_add

It was added in commit e57a784d8cae ("pkt_sched: set root qdisc
before change() in attach_default_qdiscs()") to hide duplicates
from "tc qdisc show" for incative deivices.

After 59cc1f61f ("net: sched: convert qdisc linked list to hashtable")
it triggered when classful qdisc is added to inactive device because
default qdiscs are added before switching root qdisc.

Anyway after commit ea3274695353 ("net: sched: avoid duplicates in
qdisc dump") duplicates are filtered right in dumper.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bd24a550e0f9..a3fa144b8648 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -286,9 +286,6 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 void qdisc_hash_add(struct Qdisc *q, bool invisible)
 {
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
-		struct Qdisc *root = qdisc_dev(q)->qdisc;
-
-		WARN_ON_ONCE(root == &noop_qdisc);
 		ASSERT_RTNL();
 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
 		if (invisible)
-- 
cgit v1.2.3-59-g8ed1b