From 4fabb59449aa44a585b3603ffdadd4c5f4d0c033 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Mon, 6 Jul 2015 14:35:11 +0800
Subject: rds: rds_ib_device.refcount overflow

Fixes: 3e0249f9c05c ("RDS/IB: add refcount tracking to struct rds_ib_device")

There lacks a dropping on rds_ib_device.refcount in case rds_ib_alloc_fmr
failed(mr pool running out). this lead to the refcount overflow.

A complain in line 117(see following) is seen. From vmcore:
s_ib_rdma_mr_pool_depleted is 2147485544 and rds_ibdev->refcount is -2147475448.
That is the evidence the mr pool is used up. so rds_ib_alloc_fmr is very likely
to return ERR_PTR(-EAGAIN).

115 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
116 {
117         BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0);
118         if (atomic_dec_and_test(&rds_ibdev->refcount))
119                 queue_work(rds_wq, &rds_ibdev->free_work);
120 }

fix is to drop refcount when rds_ib_alloc_fmr failed.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Reviewed-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 net/rds/ib_rdma.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 273b8bff6ba4..657ba9f5d308 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -759,8 +759,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 	}
 
 	ibmr = rds_ib_alloc_fmr(rds_ibdev);
-	if (IS_ERR(ibmr))
+	if (IS_ERR(ibmr)) {
+		rds_ib_dev_put(rds_ibdev);
 		return ibmr;
+	}
 
 	ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
 	if (ret == 0)
-- 
cgit v1.2.3-59-g8ed1b


From 035d210f928ce083435b4fd351a26d126c02c927 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 13 Jul 2015 00:06:02 +0200
Subject: rtnetlink: reject non-IFLA_VF_PORT attributes inside IFLA_VF_PORTS

Similarly as in commit 4f7d2cdfdde7 ("rtnetlink: verify IFLA_VF_INFO
attributes before passing them to driver"), we have a double nesting
of netlink attributes, i.e. IFLA_VF_PORTS only contains IFLA_VF_PORT
that is nested itself. While IFLA_VF_PORTS is a verified attribute
from ifla_policy[], we only check if the IFLA_VF_PORTS container has
IFLA_VF_PORT attributes and then pass the attribute's content itself
via nla_parse_nested(). It would be more correct to reject inner types
other than IFLA_VF_PORT instead of continuing parsing and also similarly
as in commit 4f7d2cdfdde7, to check for a minimum of NLA_HDRLEN.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
Cc: Scott Feldman <sfeldma@gmail.com>
Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9e433d58d265..dc004b1e1f85 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1804,10 +1804,13 @@ static int do_setlink(const struct sk_buff *skb,
 			goto errout;
 
 		nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
-			if (nla_type(attr) != IFLA_VF_PORT)
-				continue;
-			err = nla_parse_nested(port, IFLA_PORT_MAX,
-				attr, ifla_port_policy);
+			if (nla_type(attr) != IFLA_VF_PORT ||
+			    nla_len(attr) < NLA_HDRLEN) {
+				err = -EINVAL;
+				goto errout;
+			}
+			err = nla_parse_nested(port, IFLA_PORT_MAX, attr,
+					       ifla_port_policy);
 			if (err < 0)
 				goto errout;
 			if (!port[IFLA_PORT_VF]) {
-- 
cgit v1.2.3-59-g8ed1b


From 738ac1ebb96d02e0d23bc320302a6ea94c612dec Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 13 Jul 2015 16:04:13 +0800
Subject: net: Clone skb before setting peeked flag

Shared skbs must not be modified and this is crucial for broadcast
and/or multicast paths where we use it as an optimisation to avoid
unnecessary cloning.

The function skb_recv_datagram breaks this rule by setting peeked
without cloning the skb first.  This causes funky races which leads
to double-free.

This patch fixes this by cloning the skb and replacing the skb
in the list when setting skb->peeked.

Fixes: a59322be07c9 ("[UDP]: Only increment counter on first peek/recv")
Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 41 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index b80fb91bb3f7..4e9a3f690b7e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -131,6 +131,35 @@ out_noerr:
 	goto out;
 }
 
+static int skb_set_peeked(struct sk_buff *skb)
+{
+	struct sk_buff *nskb;
+
+	if (skb->peeked)
+		return 0;
+
+	/* We have to unshare an skb before modifying it. */
+	if (!skb_shared(skb))
+		goto done;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return -ENOMEM;
+
+	skb->prev->next = nskb;
+	skb->next->prev = nskb;
+	nskb->prev = skb->prev;
+	nskb->next = skb->next;
+
+	consume_skb(skb);
+	skb = nskb;
+
+done:
+	skb->peeked = 1;
+
+	return 0;
+}
+
 /**
  *	__skb_recv_datagram - Receive a datagram skbuff
  *	@sk: socket
@@ -165,7 +194,9 @@ out_noerr:
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 				    int *peeked, int *off, int *err)
 {
+	struct sk_buff_head *queue = &sk->sk_receive_queue;
 	struct sk_buff *skb, *last;
+	unsigned long cpu_flags;
 	long timeo;
 	/*
 	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -184,8 +215,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 		 * Look at current nfs client by the way...
 		 * However, this function was correct in any case. 8)
 		 */
-		unsigned long cpu_flags;
-		struct sk_buff_head *queue = &sk->sk_receive_queue;
 		int _off = *off;
 
 		last = (struct sk_buff *)queue;
@@ -199,7 +228,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 					_off -= skb->len;
 					continue;
 				}
-				skb->peeked = 1;
+
+				error = skb_set_peeked(skb);
+				if (error)
+					goto unlock_err;
+
 				atomic_inc(&skb->users);
 			} else
 				__skb_unlink(skb, queue);
@@ -223,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 
 	return NULL;
 
+unlock_err:
+	spin_unlock_irqrestore(&queue->lock, cpu_flags);
 no_packet:
 	*err = error;
 	return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From da278622bf04f8ddb14519a2b8214e108ef26101 Mon Sep 17 00:00:00 2001
From: Richard Stearn <richard@rns-stearn.demon.co.uk>
Date: Mon, 13 Jul 2015 11:38:24 +0200
Subject: NET: AX.25: Stop heartbeat timer on disconnect.

This may result in a kernel panic.  The bug has always existed but
somehow we've run out of luck now and it bites.

Signed-off-by: Richard Stearn <richard@rns-stearn.demon.co.uk>
Cc: stable@vger.kernel.org	# all branches
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_subr.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 1997538a5d23..3b78e8473a01 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,6 +264,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
 {
 	ax25_clear_queues(ax25);
 
+	ax25_stop_heartbeat(ax25);
 	ax25_stop_t1timer(ax25);
 	ax25_stop_t2timer(ax25);
 	ax25_stop_t3timer(ax25);
-- 
cgit v1.2.3-59-g8ed1b


From 89c22d8c3b278212eef6a8cc66b570bc840a6f5a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 13 Jul 2015 20:01:42 +0800
Subject: net: Fix skb csum races when peeking

When we calculate the checksum on the recv path, we store the
result in the skb as an optimisation in case we need the checksum
again down the line.

This is in fact bogus for the MSG_PEEK case as this is done without
any locking.  So multiple threads can peek and then store the result
to the same skb, potentially resulting in bogus skb states.

This patch fixes this by only storing the result if the skb is not
shared.  This preserves the optimisations for the few cases where
it can be done safely due to locking or other reasons, e.g., SIOCINQ.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 4e9a3f690b7e..4967262b2707 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -657,7 +657,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
 		    !skb->csum_complete_sw)
 			netdev_rx_csum_fault(skb->dev);
 	}
-	skb->csum_valid = !sum;
+	if (!skb_shared(skb))
+		skb->csum_valid = !sum;
 	return sum;
 }
 EXPORT_SYMBOL(__skb_checksum_complete_head);
@@ -677,11 +678,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
 			netdev_rx_csum_fault(skb->dev);
 	}
 
-	/* Save full packet checksum */
-	skb->csum = csum;
-	skb->ip_summed = CHECKSUM_COMPLETE;
-	skb->csum_complete_sw = 1;
-	skb->csum_valid = !sum;
+	if (!skb_shared(skb)) {
+		/* Save full packet checksum */
+		skb->csum = csum;
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		skb->csum_complete_sw = 1;
+		skb->csum_valid = !sum;
+	}
 
 	return sum;
 }
-- 
cgit v1.2.3-59-g8ed1b


From bc8c20acaea154efc558f5f4122ed65d396f6156 Mon Sep 17 00:00:00 2001
From: Satish Ashok <sashok@cumulusnetworks.com>
Date: Mon, 13 Jul 2015 05:28:37 -0700
Subject: bridge: multicast: treat igmpv3 report with INCLUDE and no sources as
 a leave

A report with INCLUDE/Change_to_include and empty source list should be
treated as a leave, specified by RFC 3376, section 3.1:
"If the requested filter mode is INCLUDE *and* the requested source
 list is empty, then the entry corresponding to the requested
 interface and multicast address is deleted if present.  If no such
 entry is present, the request is ignored."

Signed-off-by: Satish Ashok <sashok@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 742a6c27d7a2..79db489cdade 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -39,6 +39,16 @@ static void br_multicast_start_querier(struct net_bridge *br,
 				       struct bridge_mcast_own_query *query);
 static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port);
+static void br_ip4_multicast_leave_group(struct net_bridge *br,
+					 struct net_bridge_port *port,
+					 __be32 group,
+					 __u16 vid);
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_leave_group(struct net_bridge *br,
+					 struct net_bridge_port *port,
+					 const struct in6_addr *group,
+					 __u16 vid);
+#endif
 unsigned int br_mdb_rehash_seq;
 
 static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
@@ -1010,9 +1020,15 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_ip4_multicast_add_group(br, port, group, vid);
-		if (err)
-			break;
+		if ((type == IGMPV3_CHANGE_TO_INCLUDE ||
+		     type == IGMPV3_MODE_IS_INCLUDE) &&
+		    ntohs(grec->grec_nsrcs) == 0) {
+			br_ip4_multicast_leave_group(br, port, group, vid);
+		} else {
+			err = br_ip4_multicast_add_group(br, port, group, vid);
+			if (err)
+				break;
+		}
 	}
 
 	return err;
@@ -1071,10 +1087,17 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
-						 vid);
-		if (err)
-			break;
+		if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
+		     grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
+		    ntohs(*nsrcs) == 0) {
+			br_ip6_multicast_leave_group(br, port, &grec->grec_mca,
+						     vid);
+		} else {
+			err = br_ip6_multicast_add_group(br, port,
+							 &grec->grec_mca, vid);
+			if (!err)
+				break;
+		}
 	}
 
 	return err;
-- 
cgit v1.2.3-59-g8ed1b


From 5ebc784625ea68a9570d1f70557e7932988cd1b4 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Mon, 13 Jul 2015 06:36:19 -0700
Subject: bridge: mdb: fix double add notification

Since the mdb add/del code was introduced there have been 2 br_mdb_notify
calls when doing br_mdb_add() resulting in 2 notifications on each add.

Example:
 Command: bridge mdb add dev br0 port eth1 grp 239.0.0.1 permanent
 Before patch:
 root@debian:~# bridge monitor all
 [MDB]dev br0 port eth1 grp 239.0.0.1 permanent
 [MDB]dev br0 port eth1 grp 239.0.0.1 permanent

 After patch:
 root@debian:~# bridge monitor all
 [MDB]dev br0 port eth1 grp 239.0.0.1 permanent

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Fixes: cfd567543590 ("bridge: add support of adding and deleting mdb entries")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mdb.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index c11cf2611db0..1198a3dbad95 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -351,7 +351,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 	if (state == MDB_TEMPORARY)
 		mod_timer(&p->timer, now + br->multicast_membership_interval);
 
-	br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f82b681a511f4d61069e9586a9cf97bdef371ef3 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 13 Jul 2015 12:10:20 -0700
Subject: tcp: don't use F-RTO on non-recurring timeouts

Currently F-RTO may repeatedly send new data packets on non-recurring
timeouts in CA_Loss mode. This is a bug because F-RTO (RFC5682)
should only be used on either new recovery or recurring timeouts.

This exacerbates the recovery progress during frequent timeout &
repair, because we prioritize sending new data packets instead of
repairing the holes when the bandwidth is already scarce.

Fix it by correcting the test of a new recovery episode.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 684f095d196e..728f5b3d3c64 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1917,14 +1917,13 @@ void tcp_enter_loss(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	bool new_recovery = false;
+	bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
 	bool is_reneg;			/* is receiver reneging on SACKs? */
 
 	/* Reduce ssthresh if it has not yet been made inside this window. */
 	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
 	    !after(tp->high_seq, tp->snd_una) ||
 	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
-		new_recovery = true;
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_LOSS);
-- 
cgit v1.2.3-59-g8ed1b


From 052cbda41fdc243a8d40cce7ab3a6327b4b2887e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 13 Jul 2015 12:30:07 -0700
Subject: fq_codel: fix a use-after-free

Fixes: 25331d6ce42b ("net: sched: implement qstat helper routines")
Cc: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq_codel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index d75993f89fac..06e7c845e24d 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -155,10 +155,10 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
 	skb = dequeue_head(flow);
 	len = qdisc_pkt_len(skb);
 	q->backlogs[idx] -= len;
-	kfree_skb(skb);
 	sch->q.qlen--;
 	qdisc_qstats_drop(sch);
 	qdisc_qstats_backlog_dec(sch, skb);
+	kfree_skb(skb);
 	flow->dropped++;
 	return idx;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 03645a11a570d52e70631838cb786eb4253eb463 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 14 Jul 2015 08:10:22 +0200
Subject: ipv6: lock socket in ip6_datagram_connect()

ip6_datagram_connect() is doing a lot of socket changes without
socket being locked.

This looks wrong, at least for udp_lib_rehash() which could corrupt
lists because of concurrent udp_sk(sk)->udp_portaddr_hash accesses.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h    |  1 +
 net/ipv4/datagram.c | 16 ++++++++++++----
 net/ipv6/datagram.c | 20 +++++++++++++++-----
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 0750a186ea63..d5fe9f2ab699 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -161,6 +161,7 @@ static inline __u8 get_rtconn_flags(struct ipcm_cookie* ipc, struct sock* sk)
 }
 
 /* datagram.c */
+int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 
 void ip4_datagram_release_cb(struct sock *sk);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 90c0e8386116..574fad9cca05 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -20,7 +20,7 @@
 #include <net/route.h>
 #include <net/tcp_states.h>
 
-int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
@@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	sk_dst_reset(sk);
 
-	lock_sock(sk);
-
 	oif = sk->sk_bound_dev_if;
 	saddr = inet->inet_saddr;
 	if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
@@ -82,9 +80,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk_dst_set(sk, &rt->dst);
 	err = 0;
 out:
-	release_sock(sk);
 	return err;
 }
+EXPORT_SYMBOL(__ip4_datagram_connect);
+
+int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	int res;
+
+	lock_sock(sk);
+	res = __ip4_datagram_connect(sk, uaddr, addr_len);
+	release_sock(sk);
+	return res;
+}
 EXPORT_SYMBOL(ip4_datagram_connect);
 
 /* Because UDP xmit path can manipulate sk_dst_cache without holding
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 62d908e64eeb..b10a88986a98 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
 	return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
 }
 
-int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock	*inet = inet_sk(sk);
@@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (usin->sin6_family == AF_INET) {
 		if (__ipv6_only_sock(sk))
 			return -EAFNOSUPPORT;
-		err = ip4_datagram_connect(sk, uaddr, addr_len);
+		err = __ip4_datagram_connect(sk, uaddr, addr_len);
 		goto ipv4_connected;
 	}
 
@@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		sin.sin_addr.s_addr = daddr->s6_addr32[3];
 		sin.sin_port = usin->sin6_port;
 
-		err = ip4_datagram_connect(sk,
-					   (struct sockaddr *) &sin,
-					   sizeof(sin));
+		err = __ip4_datagram_connect(sk,
+					     (struct sockaddr *) &sin,
+					     sizeof(sin));
 
 ipv4_connected:
 		if (err)
@@ -204,6 +204,16 @@ out:
 	fl6_sock_release(flowlabel);
 	return err;
 }
+
+int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	int res;
+
+	lock_sock(sk);
+	res = __ip6_datagram_connect(sk, uaddr, addr_len);
+	release_sock(sk);
+	return res;
+}
 EXPORT_SYMBOL_GPL(ip6_datagram_connect);
 
 int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
-- 
cgit v1.2.3-59-g8ed1b


From e8d092aafd9e68c04d7b468e95ff7a617998a796 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 14 Jul 2015 11:21:57 -0700
Subject: net_sched: fix a use-after-free in sfq

Fixes: 25331d6ce42b ("net: sched: implement qstat helper routines")
Cc: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 7d1492663360..52f75a5473e1 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -306,10 +306,10 @@ drop:
 		len = qdisc_pkt_len(skb);
 		slot->backlog -= len;
 		sfq_dec(q, x);
-		kfree_skb(skb);
 		sch->q.qlen--;
 		qdisc_qstats_drop(sch);
 		qdisc_qstats_backlog_dec(sch, skb);
+		kfree_skb(skb);
 		return len;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From c0afd9ce4d6a646fb6433536f95a418bb348fab1 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 14 Jul 2015 11:21:58 -0700
Subject: fq_codel: fix return value of fq_codel_drop()

The ->drop() is supposed to return the number of bytes it dropped,
however fq_codel_drop() returns the index of the flow where it drops
a packet from.

Fix this by introducing a helper to wrap fq_codel_drop().

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq_codel.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 06e7c845e24d..21ca33c9f036 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -163,6 +163,15 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
 	return idx;
 }
 
+static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
+{
+	unsigned int prev_backlog;
+
+	prev_backlog = sch->qstats.backlog;
+	fq_codel_drop(sch);
+	return prev_backlog - sch->qstats.backlog;
+}
+
 static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -604,7 +613,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
 	.enqueue	=	fq_codel_enqueue,
 	.dequeue	=	fq_codel_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.drop		=	fq_codel_drop,
+	.drop		=	fq_codel_qdisc_drop,
 	.init		=	fq_codel_init,
 	.reset		=	fq_codel_reset,
 	.destroy	=	fq_codel_destroy,
-- 
cgit v1.2.3-59-g8ed1b


From ddf06c1e569a64a44c4c750ae45b2604f19e45f0 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 14 Jul 2015 12:15:19 -0700
Subject: tc: act_bpf: fix memory leak

prog->bpf_ops is populated when act_bpf is used with classic BPF and
prog->bpf_name is optionally used with extended BPF.
Fix memory leak when act_bpf is released.

Fixes: d23b8ad8ab23 ("tc: add BPF based action")
Fixes: a8cb5f556b56 ("act_bpf: add initial eBPF support for actions")
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_bpf.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1d56903fd4c7..1df78289e248 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -339,6 +339,9 @@ static void tcf_bpf_cleanup(struct tc_action *act, int bind)
 		bpf_prog_put(prog->filter);
 	else
 		bpf_prog_destroy(prog->filter);
+
+	kfree(prog->bpf_ops);
+	kfree(prog->bpf_name);
 }
 
 static struct tc_action_ops act_bpf_ops __read_mostly = {
-- 
cgit v1.2.3-59-g8ed1b


From 4479004e6409087d1b4986881dc98c6c15dffb28 Mon Sep 17 00:00:00 2001
From: Tom Hughes <tom@compton.nu>
Date: Mon, 29 Jun 2015 19:41:49 +0100
Subject: mac80211: clear subdir_stations when removing debugfs

If we don't do this, and we then fail to recreate the debugfs
directory during a mode change, then we will fail later trying
to add stations to this now bogus directory:

BUG: unable to handle kernel NULL pointer dereference at 0000006c
IP: [<c0a92202>] mutex_lock+0x12/0x30
Call Trace:
[<c0678ab4>] start_creating+0x44/0xc0
[<c0679203>] debugfs_create_dir+0x13/0xf0
[<f8a938ae>] ieee80211_sta_debugfs_add+0x6e/0x490 [mac80211]

Cc: stable@kernel.org
Signed-off-by: Tom Hughes <tom@compton.nu>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/debugfs_netdev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 29236e832e44..c09c0131bfa2 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -723,6 +723,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
 
 	debugfs_remove_recursive(sdata->vif.debugfs_dir);
 	sdata->vif.debugfs_dir = NULL;
+	sdata->debugfs.subdir_stations = NULL;
 }
 
 void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
-- 
cgit v1.2.3-59-g8ed1b


From e9de01907e3d1957591113daa3857c0bf01067ef Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Thu, 2 Jul 2015 09:59:56 +0200
Subject: mac80211: don't clear all tx flags when requeing

When acting as AP and a PS-Poll frame is received
associated station is marked as one in a Service
Period. This state is kept until Tx status for
released frame is reported. While a station is in
Service Period PS-Poll frames are ignored.

However if PS-Poll was received during A-MPDU
teardown it was possible to have the to-be
released frame re-queued back to pending queue.
In such case the frame was stripped of 2 important
flags:

 (a) IEEE80211_TX_CTL_NO_PS_BUFFER
 (b) IEEE80211_TX_STATUS_EOSP

Stripping of (a) led to the frame that was to be
released to be queued back to ps_tx_buf queue. If
station remained to use only PS-Poll frames the
re-queued frame (and new ones) was never actually
transmitted because mac80211 would ignore
subsequent PS-Poll frames due to station being in
Service Period. There was nothing left to clear
the Service Period bit (no xmit -> no tx status ->
no SP end), i.e. the AP would have the station
stuck in Service Period. Beacon TIM would
repeatedly prompt station to poll for frames but
it would get none.

Once (a) is not stripped (b) becomes important
because it's the main condition to clear the
Service Period bit of the station when Tx status
for the released frame is reported back.

This problem was observed with ath9k acting as P2P
GO in some testing scenarios but isn't limited to
it. AP operation with mac80211 based Tx A-MPDU
control combined with clients using PS-Poll frames
is subject to this race.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8410bb3bf5e8..b8233505bf9f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1117,7 +1117,9 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
 			queued = true;
 			info->control.vif = &tx->sdata->vif;
 			info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
-			info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
+			info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS |
+					IEEE80211_TX_CTL_NO_PS_BUFFER |
+					IEEE80211_TX_STATUS_EOSP;
 			__skb_queue_tail(&tid_tx->pending, skb);
 			if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER)
 				purge_skb = __skb_dequeue(&tid_tx->pending);
-- 
cgit v1.2.3-59-g8ed1b


From 541b6ed7cee1ec7c8f525f51a0ff097776111aeb Mon Sep 17 00:00:00 2001
From: Chaitanya T K <chaitanya.mgit@gmail.com>
Date: Wed, 10 Jun 2015 19:12:31 +0530
Subject: mac80211: wowlan: enable powersave if suspend while ps-polling

If for any reason we're in the middle of PS-polling or awake after
TX due to dynamic powersave while going to suspend, go back to save
power. This might cause a response frame to get lost, but since we
can't really wait for it while going to suspend that's still better
than not enabling powersave which would cause higher power usage
during (and possibly even after) suspend.

Note that this really only affects the very few drivers that use
the powersave implementation in mac80211.

Signed-off-by: Chaitanya T K <chaitanya.mgit@gmail.com>
[rewrite misleading commit log]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/pm.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 06b60980c62c..b676b9fa707b 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -76,6 +76,22 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 			if (sdata->vif.type != NL80211_IFTYPE_STATION)
 				continue;
 			ieee80211_mgd_quiesce(sdata);
+			/* If suspended during TX in progress, and wowlan
+			 * is enabled (connection will be active) there
+			 * can be a race where the driver is put out
+			 * of power-save due to TX and during suspend
+			 * dynamic_ps_timer is cancelled and TX packet
+			 * is flushed, leaving the driver in ACTIVE even
+			 * after resuming until dynamic_ps_timer puts
+			 * driver back in DOZE.
+			 */
+			if (sdata->u.mgd.associated &&
+			    sdata->u.mgd.powersave &&
+			     !(local->hw.conf.flags & IEEE80211_CONF_PS)) {
+				local->hw.conf.flags |= IEEE80211_CONF_PS;
+				ieee80211_hw_config(local,
+						    IEEE80211_CONF_CHANGE_PS);
+			}
 		}
 
 		err = drv_suspend(local, wowlan);
-- 
cgit v1.2.3-59-g8ed1b


From d8d9008cfb919db48d95f96b05e81f84b3774318 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 8 Jul 2015 15:41:50 +0300
Subject: mac80211: shut down interfaces before destroying interface list

If the hardware is unregistered while interfaces are up, mac80211 will
unregister all interfaces, which in turns causes mac80211 to be called
again to remove them all from the driver and eventually shut down the
hardware.

During this shutdown, however, it's currently already unsafe to iterate
the list of interfaces atomically, as the list is manipulated in an
unsafe manner. This puts an undue burden on the driver - it must stop
all its activities before calling ieee80211_unregister_hw(), while in
the normal stop path it can do all cleanup in the stop method. If, for
example, it's using the iteration during RX for some reason, it would
have to stop RX before unregistering to avoid crashes.

Fix this problem by closing all interfaces before unregistering them.
This will cause the driver stop to have completed before we manipulate
the interface list, and after the driver is stopped *and* has called
ieee80211_unregister_hw() it really musn't be iterating any more as
the memory will be freed as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ed1edac14372..553ac6dd4867 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1863,10 +1863,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata)
 	ieee80211_teardown_sdata(sdata);
 }
 
-/*
- * Remove all interfaces, may only be called at hardware unregistration
- * time because it doesn't do RCU-safe list removals.
- */
 void ieee80211_remove_interfaces(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata, *tmp;
@@ -1875,14 +1871,21 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
 
 	ASSERT_RTNL();
 
-	/*
-	 * Close all AP_VLAN interfaces first, as otherwise they
-	 * might be closed while the AP interface they belong to
-	 * is closed, causing unregister_netdevice_many() to crash.
+	/* Before destroying the interfaces, make sure they're all stopped so
+	 * that the hardware is stopped. Otherwise, the driver might still be
+	 * iterating the interfaces during the shutdown, e.g. from a worker
+	 * or from RX processing or similar, and if it does so (using atomic
+	 * iteration) while we're manipulating the list, the iteration will
+	 * crash.
+	 *
+	 * After this, the hardware should be stopped and the driver should
+	 * have stopped all of its activities, so that we can do RCU-unaware
+	 * manipulations of the interface list below.
 	 */
-	list_for_each_entry(sdata, &local->interfaces, list)
-		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-			dev_close(sdata->dev);
+	cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
+	WARN(local->open_count, "%s: open count remains %d\n",
+	     wiphy_name(local->hw.wiphy), local->open_count);
 
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
-- 
cgit v1.2.3-59-g8ed1b


From 042ab5fc7a80b934032fcc673a125feb36645b33 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 9 Jul 2015 15:35:15 +0200
Subject: wireless: regulatory: reduce log level of CRDA related messages

With a basic Linux userspace, the messages "Calling CRDA to update
world regulatory domain" appears 10 times after boot every second or
so, followed by a final "Exceeded CRDA call max attempts. Not calling
CRDA". For those of us not having the corresponding userspace parts,
having those messages repeatedly displayed at boot time is a bit
annoying, so this commit reduces their log level to pr_debug().

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index d359e0610198..29134c81e73c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -544,15 +544,15 @@ static int call_crda(const char *alpha2)
 	reg_regdb_query(alpha2);
 
 	if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) {
-		pr_info("Exceeded CRDA call max attempts. Not calling CRDA\n");
+		pr_debug("Exceeded CRDA call max attempts. Not calling CRDA\n");
 		return -EINVAL;
 	}
 
 	if (!is_world_regdom((char *) alpha2))
-		pr_info("Calling CRDA for country: %c%c\n",
+		pr_debug("Calling CRDA for country: %c%c\n",
 			alpha2[0], alpha2[1]);
 	else
-		pr_info("Calling CRDA to update world regulatory domain\n");
+		pr_debug("Calling CRDA to update world regulatory domain\n");
 
 	return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 2ea752cd2ce066c5d8c1807b5310ef329885cecb Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Tue, 14 Jul 2015 08:31:55 -0400
Subject: mac80211: correct aid location in peering frames

According to 802.11-2012 8.5.16.3.2 AID comes directly after the
capability bytes in mesh peering confirm frames.  The existing
code, however, was adding a 2 byte offset to this location,
resulting in garbage data going out over the air.  Remove the
offset to fix it.

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_plink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 5438d13e2f00..f17127e754c9 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -306,7 +306,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		if (action == WLAN_SP_MESH_PEERING_CONFIRM) {
 			/* AID */
 			pos = skb_put(skb, 2);
-			put_unaligned_le16(plid, pos + 2);
+			put_unaligned_le16(plid, pos);
 		}
 		if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
 		    ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
-- 
cgit v1.2.3-59-g8ed1b


From b3e7de873df77c1fa0bc2cfaf3eaff757b80e773 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Tue, 14 Jul 2015 08:31:56 -0400
Subject: mac80211: add missing length check for confirm frames

Although mesh_rx_plink_frame() already checks that frames have enough
bytes for the action code plus another two bytes for capability/reason
code, it doesn't take into account that confirm frames also have an
additional two-byte aid.  As a result, a corrupt frame could cause a
subsequent subtraction to wrap around to ill effect.  Add another
check for this case.

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_plink.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f17127e754c9..3b59099413fb 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -1122,6 +1122,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
 						WLAN_SP_MESH_PEERING_CONFIRM) {
 		baseaddr += 4;
 		baselen += 4;
+
+		if (baselen > len)
+			return;
 	}
 	ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);
 	mesh_process_plink_frame(sdata, mgmt, &elems);
-- 
cgit v1.2.3-59-g8ed1b


From 923b352f19d9ea971ae2536eab55f5fc9e95fedf Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 8 Jul 2015 15:41:44 +0300
Subject: cfg80211: use RTNL locked reg_can_beacon for IR-relaxation

The RTNL is required to check for IR-relaxation conditions that allow
more channels to beacon. Export an RTNL locked version of reg_can_beacon
and use it where possible in AP/STA interface type flows, where
IR-relaxation may be applicable.

Fixes: 06f207fc5418 ("cfg80211: change GO_CONCURRENT to IR_CONCURRENT for STA")
Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 17 +++++++++++++++++
 net/mac80211/tdls.c    |  6 +++---
 net/wireless/chan.c    | 45 ++++++++++++++++++++++++++++++++++-----------
 net/wireless/nl80211.c | 14 ++++++++------
 net/wireless/reg.c     |  2 +-
 net/wireless/trace.h   | 11 +++++++----
 6 files changed, 70 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a741678f24a2..883fe1e7c5a1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4868,6 +4868,23 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
 			     struct cfg80211_chan_def *chandef,
 			     enum nl80211_iftype iftype);
 
+/**
+ * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation
+ * @wiphy: the wiphy
+ * @chandef: the channel definition
+ * @iftype: interface type
+ *
+ * Return: %true if there is no secondary channel or the secondary channel(s)
+ * can be used for beaconing (i.e. is not a radar channel etc.). This version
+ * also checks if IR-relaxation conditions apply, to allow beaconing under
+ * more permissive conditions.
+ *
+ * Requires the RTNL to be held.
+ */
+bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
+				   struct cfg80211_chan_def *chandef,
+				   enum nl80211_iftype iftype);
+
 /*
  * cfg80211_ch_switch_notify - update wdev channel and notify userspace
  * @dev: the device which switched channels
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index ad31b2dab4f5..8db6e2994bbc 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -60,6 +60,7 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_channel *ch;
 	struct cfg80211_chan_def chandef;
 	int i, subband_start;
+	struct wiphy *wiphy = sdata->local->hw.wiphy;
 
 	for (i = start; i <= end; i += spacing) {
 		if (!ch_cnt)
@@ -70,9 +71,8 @@ ieee80211_tdls_add_subband(struct ieee80211_sub_if_data *sdata,
 			/* we will be active on the channel */
 			cfg80211_chandef_create(&chandef, ch,
 						NL80211_CHAN_NO_HT);
-			if (cfg80211_reg_can_beacon(sdata->local->hw.wiphy,
-						    &chandef,
-						    sdata->wdev.iftype)) {
+			if (cfg80211_reg_can_beacon_relax(wiphy, &chandef,
+							  sdata->wdev.iftype)) {
 				ch_cnt++;
 				/*
 				 * check if the next channel is also part of
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 915b328b9ac5..59cabc9bce69 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -797,23 +797,18 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
 	return false;
 }
 
-bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
-			     struct cfg80211_chan_def *chandef,
-			     enum nl80211_iftype iftype)
+static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy,
+				     struct cfg80211_chan_def *chandef,
+				     enum nl80211_iftype iftype,
+				     bool check_no_ir)
 {
 	bool res;
 	u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
 			       IEEE80211_CHAN_RADAR;
 
-	trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype);
+	trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir);
 
-	/*
-	 * Under certain conditions suggested by some regulatory bodies a
-	 * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag
-	 * only if such relaxations are not enabled and the conditions are not
-	 * met.
-	 */
-	if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan))
+	if (check_no_ir)
 		prohibited_flags |= IEEE80211_CHAN_NO_IR;
 
 	if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
@@ -827,8 +822,36 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
 	trace_cfg80211_return_bool(res);
 	return res;
 }
+
+bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
+			     struct cfg80211_chan_def *chandef,
+			     enum nl80211_iftype iftype)
+{
+	return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, true);
+}
 EXPORT_SYMBOL(cfg80211_reg_can_beacon);
 
+bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
+				   struct cfg80211_chan_def *chandef,
+				   enum nl80211_iftype iftype)
+{
+	bool check_no_ir;
+
+	ASSERT_RTNL();
+
+	/*
+	 * Under certain conditions suggested by some regulatory bodies a
+	 * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag
+	 * only if such relaxations are not enabled and the conditions are not
+	 * met.
+	 */
+	check_no_ir = !cfg80211_ir_permissive_chan(wiphy, iftype,
+						   chandef->chan);
+
+	return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir);
+}
+EXPORT_SYMBOL(cfg80211_reg_can_beacon_relax);
+
 int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
 				 struct cfg80211_chan_def *chandef)
 {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c264effd00a6..76b41578a838 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2003,7 +2003,8 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
 	switch (iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
-		if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) {
+		if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef,
+						   iftype)) {
 			result = -EINVAL;
 			break;
 		}
@@ -3403,8 +3404,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 	} else if (!nl80211_get_ap_channel(rdev, &params))
 		return -EINVAL;
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
-				     wdev->iftype))
+	if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params.chandef,
+					   wdev->iftype))
 		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
@@ -6492,8 +6493,8 @@ skip_beacons:
 	if (err)
 		return err;
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
-				     wdev->iftype))
+	if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params.chandef,
+					   wdev->iftype))
 		return -EINVAL;
 
 	err = cfg80211_chandef_dfs_required(wdev->wiphy,
@@ -10170,7 +10171,8 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb,
 		return -EINVAL;
 
 	/* we will be active on the TDLS link */
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, wdev->iftype))
+	if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef,
+					   wdev->iftype))
 		return -EINVAL;
 
 	/* don't allow switching to DFS channels */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 29134c81e73c..aa2d75482017 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1589,7 +1589,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev)
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_ADHOC:
-		return cfg80211_reg_can_beacon(wiphy, &chandef, iftype);
+		return cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype);
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
 		return cfg80211_chandef_usable(wiphy, &chandef,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index af3617c9879e..a808279a432a 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2358,20 +2358,23 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify,
 
 TRACE_EVENT(cfg80211_reg_can_beacon,
 	TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef,
-		 enum nl80211_iftype iftype),
-	TP_ARGS(wiphy, chandef, iftype),
+		 enum nl80211_iftype iftype, bool check_no_ir),
+	TP_ARGS(wiphy, chandef, iftype, check_no_ir),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		CHAN_DEF_ENTRY
 		__field(enum nl80211_iftype, iftype)
+		__field(bool, check_no_ir)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		CHAN_DEF_ASSIGN(chandef);
 		__entry->iftype = iftype;
+		__entry->check_no_ir = check_no_ir;
 	),
-	TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d",
-		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype)
+	TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s",
+		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype,
+		  BOOL_TO_STR(__entry->check_no_ir))
 );
 
 TRACE_EVENT(cfg80211_chandef_dfs_required,
-- 
cgit v1.2.3-59-g8ed1b


From fdbf5b097bbd9693a86c0b8bfdd071a9a2117cfc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 20 Jul 2015 17:55:38 +0800
Subject: Revert "sit: Add gro callbacks to sit_offload"

This patch reverts 19424e052fb44da2f00d1a868cbb51f3e9f4bbb5 ("sit:
Add gro callbacks to sit_offload") because it generates packets
that cannot be handled even by our own GSO.

Reported-by: Wolfgang Walter <linux@stwm.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_offload.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index e893cd18612f..08b62047c67f 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
 static const struct net_offload sit_offload = {
 	.callbacks = {
 		.gso_segment	= ipv6_gso_segment,
-		.gro_receive	= ipv6_gro_receive,
-		.gro_complete	= ipv6_gro_complete,
 	},
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From b8a23e8d8e31abeda2f6cfa36a772414b2a86ffc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 17 Jul 2015 10:19:23 +0200
Subject: caif: fix leaks and race in caif_queue_rcv_skb()

1) If sk_filter() is applied, skb was leaked (not freed)
2) Testing SOCK_DEAD twice is racy :
   packet could be freed while already queued.
3) Remove obsolete comment about caching skb->len

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 3cc71b9f5517..cc858919108e 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -121,12 +121,13 @@ static void caif_flow_ctrl(struct sock *sk, int mode)
  * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
  * not dropped, but CAIF is sending flow off instead.
  */
-static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static void caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int err;
 	unsigned long flags;
 	struct sk_buff_head *list = &sk->sk_receive_queue;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	bool queued = false;
 
 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
 		(unsigned int)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
@@ -139,7 +140,8 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	err = sk_filter(sk, skb);
 	if (err)
-		return err;
+		goto out;
+
 	if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) {
 		set_rx_flow_off(cf_sk);
 		net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n");
@@ -147,21 +149,16 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	}
 	skb->dev = NULL;
 	skb_set_owner_r(skb, sk);
-	/* Cache the SKB length before we tack it onto the receive
-	 * queue. Once it is added it no longer belongs to us and
-	 * may be freed by other threads of control pulling packets
-	 * from the queue.
-	 */
 	spin_lock_irqsave(&list->lock, flags);
-	if (!sock_flag(sk, SOCK_DEAD))
+	queued = !sock_flag(sk, SOCK_DEAD);
+	if (queued)
 		__skb_queue_tail(list, skb);
 	spin_unlock_irqrestore(&list->lock, flags);
-
-	if (!sock_flag(sk, SOCK_DEAD))
+out:
+	if (queued)
 		sk->sk_data_ready(sk);
 	else
 		kfree_skb(skb);
-	return 0;
 }
 
 /* Packet Receive Callback function called from CAIF Stack */
-- 
cgit v1.2.3-59-g8ed1b


From 8bf4ada2e21378816b28205427ee6b0e1ca4c5f1 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 17 Jul 2015 14:01:11 +0300
Subject: net: ratelimit warnings about dst entry refcount underflow or
 overflow

Kernel generates a lot of warnings when dst entry reference counter
overflows and becomes negative. That bug was seen several times at
machines with outdated 3.10.y kernels. Most like it's already fixed
in upstream. Anyway that flood completely kills machine and makes
further debugging impossible.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index e956ce6d1378..002144bea935 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -284,7 +284,9 @@ void dst_release(struct dst_entry *dst)
 		int newrefcnt;
 
 		newrefcnt = atomic_dec_return(&dst->__refcnt);
-		WARN_ON(newrefcnt < 0);
+		if (unlikely(newrefcnt < 0))
+			net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
+					     __func__, dst, newrefcnt);
 		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
 			call_rcu(&dst->rcu_head, dst_destroy_rcu);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From f6bfc46da6292b630ba389592123f0dd02066172 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 17 Jul 2015 22:38:43 +0200
Subject: sched: cls_bpf: fix panic on filter replace

The following test case causes a NULL pointer dereference in cls_bpf:

  FOO="1,6 0 0 4294967295,"
  tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok
  tc filter replace dev foo parent 1: pref 49152 handle 0x1 \
            bpf bytecode "$FOO" flowid 1:1 action drop

The problem is that commit 1f947bf151e9 ("net: sched: rcu'ify cls_bpf")
accidentally swapped the arguments of list_replace_rcu(), the old
element needs to be the first argument and the new element the second.

Fixes: 1f947bf151e9 ("net: sched: rcu'ify cls_bpf")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_bpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c79ecfd36e0f..e5168f8b9640 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -378,7 +378,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 
 	if (oldprog) {
-		list_replace_rcu(&prog->link, &oldprog->link);
+		list_replace_rcu(&oldprog->link, &prog->link);
 		tcf_unbind_filter(tp, &oldprog->res);
 		call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
 	} else {
-- 
cgit v1.2.3-59-g8ed1b


From ff3532f2655b79058fec035ca54fced69a483084 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 17 Jul 2015 22:38:44 +0200
Subject: sched: cls_flower: fix panic on filter replace

The following test case causes a NULL pointer dereference in cls_flower:

  tc filter add dev foo parent 1: flower eth_type ipv4 action ok flowid 1:1
  tc filter replace dev foo parent 1: pref 49152 handle 0x1 \
            flower eth_type ipv6 action ok flowid 1:1

The problem is that commit 77b9900ef53a ("tc: introduce Flower classifier")
accidentally swapped the arguments of list_replace_rcu(), the old
element needs to be the first argument and the new element the second.

Fixes: 77b9900ef53a ("tc: introduce Flower classifier")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9d37ccd95062..2f3d03f99487 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -499,7 +499,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	*arg = (unsigned long) fnew;
 
 	if (fold) {
-		list_replace_rcu(&fnew->list, &fold->list);
+		list_replace_rcu(&fold->list, &fnew->list);
 		tcf_unbind_filter(tp, &fold->res);
 		call_rcu(&fold->rcu, fl_destroy_filter);
 	} else {
-- 
cgit v1.2.3-59-g8ed1b


From 32b2f4b196b37695fdb42b31afcbc15399d6ef91 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 17 Jul 2015 22:38:45 +0200
Subject: sched: cls_flow: fix panic on filter replace

The following test case causes a NULL pointer dereference in cls_flow:

  tc filter add dev foo parent 1: handle 0x1 flow hash keys dst action ok
  tc filter replace dev foo parent 1: pref 49152 handle 0x1 \
            flow hash keys mark action drop

To be more precise, actually two different panics are fixed, the first
occurs because tcf_exts_init() is not called on the newly allocated
filter when we do a replace. And the second panic uncovered after that
happens since the arguments of list_replace_rcu() are swapped, the old
element needs to be the first argument and the new element the second.

Fixes: 70da9f0bf999 ("net: sched: cls_flow use RCU")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flow.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 76bc3a20ffdb..bb2a0f529c1f 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -425,6 +425,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (!fnew)
 		goto err2;
 
+	tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+
 	fold = (struct flow_filter *)*arg;
 	if (fold) {
 		err = -EINVAL;
@@ -486,7 +488,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		fnew->mask  = ~0U;
 		fnew->tp = tp;
 		get_random_bytes(&fnew->hashrnd, 4);
-		tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
 	}
 
 	fnew->perturb_timer.function = flow_perturbation;
@@ -526,7 +527,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (*arg == 0)
 		list_add_tail_rcu(&fnew->list, &head->filters);
 	else
-		list_replace_rcu(&fnew->list, &fold->list);
+		list_replace_rcu(&fold->list, &fnew->list);
 
 	*arg = (unsigned long)fnew;
 
-- 
cgit v1.2.3-59-g8ed1b


From 0848f6428ba3a2e42db124d41ac6f548655735bf Mon Sep 17 00:00:00 2001
From: Edward Hyunkoo Jee <edjee@google.com>
Date: Tue, 21 Jul 2015 09:43:59 +0200
Subject: inet: frags: fix defragmented packet's IP header for af_packet

When ip_frag_queue() computes positions, it assumes that the passed
sk_buff does not contain L2 headers.

However, when PACKET_FANOUT_FLAG_DEFRAG is used, IP reassembly
functions can be called on outgoing packets that contain L2 headers.

Also, IPv4 checksum is not corrected after reassembly.

Fixes: 7736d33f4262 ("packet: Add pre-defragmentation support for ipv4 fanouts.")
Signed-off-by: Edward Hyunkoo Jee <edjee@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a50dc6d408d1..31f71b15cfba 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -351,7 +351,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	ihl = ip_hdrlen(skb);
 
 	/* Determine the position of this fragment. */
-	end = offset + skb->len - ihl;
+	end = offset + skb->len - skb_network_offset(skb) - ihl;
 	err = -EINVAL;
 
 	/* Is this the final fragment? */
@@ -381,7 +381,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		goto err;
 
 	err = -ENOMEM;
-	if (!pskb_pull(skb, ihl))
+	if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
 		goto err;
 
 	err = pskb_trim_rcsum(skb, end - offset);
@@ -641,6 +641,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		iph->frag_off = 0;
 	}
 
+	ip_send_check(iph);
+
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
 	qp->q.fragments_tail = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 89e478a2aa58af2548b7f316e4d5b6bcc9eade5b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Jul 2015 07:02:00 +0200
Subject: tcp: suppress a division by zero warning

Andrew Morton reported following warning on one ARM build
with gcc-4.4 :

net/ipv4/inet_hashtables.c: In function 'inet_ehash_locks_alloc':
net/ipv4/inet_hashtables.c:617: warning: division by zero

Even guarded with a test on sizeof(spinlock_t), compiler does not
like current construct on a !CONFIG_SMP build.

Remove the warning by using a temporary variable.

Fixes: 095dc8e0c368 ("tcp: fix/cleanup inet_ehash_locks_alloc()")
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_hashtables.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 5f9b063bbe8a..0cb9165421d4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -624,22 +624,21 @@ EXPORT_SYMBOL_GPL(inet_hashinfo_init);
 
 int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
 {
+	unsigned int locksz = sizeof(spinlock_t);
 	unsigned int i, nblocks = 1;
 
-	if (sizeof(spinlock_t) != 0) {
+	if (locksz != 0) {
 		/* allocate 2 cache lines or at least one spinlock per cpu */
-		nblocks = max_t(unsigned int,
-				2 * L1_CACHE_BYTES / sizeof(spinlock_t),
-				1);
+		nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
 		nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
 
 		/* no more locks than number of hash buckets */
 		nblocks = min(nblocks, hashinfo->ehash_mask + 1);
 
-		hashinfo->ehash_locks =	kmalloc_array(nblocks, sizeof(spinlock_t),
+		hashinfo->ehash_locks =	kmalloc_array(nblocks, locksz,
 						      GFP_KERNEL | __GFP_NOWARN);
 		if (!hashinfo->ehash_locks)
-			hashinfo->ehash_locks = vmalloc(nblocks * sizeof(spinlock_t));
+			hashinfo->ehash_locks = vmalloc(nblocks * locksz);
 
 		if (!hashinfo->ehash_locks)
 			return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 0470eb99b4721586ccac954faac3fa4472da0845 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 21 Jul 2015 16:33:50 +0200
Subject: netlink: don't hold mutex in rcu callback when releasing mmapd ring

Kirill A. Shutemov says:

This simple test-case trigers few locking asserts in kernel:

int main(int argc, char **argv)
{
        unsigned int block_size = 16 * 4096;
        struct nl_mmap_req req = {
                .nm_block_size          = block_size,
                .nm_block_nr            = 64,
                .nm_frame_size          = 16384,
                .nm_frame_nr            = 64 * block_size / 16384,
        };
        unsigned int ring_size;
	int fd;

	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
        if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
                exit(1);
        if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
                exit(1);

	ring_size = req.nm_block_nr * req.nm_block_size;
	mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	return 0;
}

+++ exited with 0 +++
BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616
in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init
3 locks held by init/1:
 #0:  (reboot_mutex){+.+...}, at: [<ffffffff81080959>] SyS_reboot+0xa9/0x220
 #1:  ((reboot_notifier_list).rwsem){.+.+..}, at: [<ffffffff8107f379>] __blocking_notifier_call_chain+0x39/0x70
 #2:  (rcu_callback){......}, at: [<ffffffff810d32e0>] rcu_do_batch.isra.49+0x160/0x10c0
Preemption disabled at:[<ffffffff8145365f>] __delay+0xf/0x20

CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 #253
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014
 ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102
 0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002
 ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98
Call Trace:
 <IRQ>  [<ffffffff81929ceb>] dump_stack+0x4f/0x7b
 [<ffffffff81085a9d>] ___might_sleep+0x16d/0x270
 [<ffffffff81085bed>] __might_sleep+0x4d/0x90
 [<ffffffff8192e96f>] mutex_lock_nested+0x2f/0x430
 [<ffffffff81932fed>] ? _raw_spin_unlock_irqrestore+0x5d/0x80
 [<ffffffff81464143>] ? __this_cpu_preempt_check+0x13/0x20
 [<ffffffff8182fc3d>] netlink_set_ring+0x1ed/0x350
 [<ffffffff8182e000>] ? netlink_undo_bind+0x70/0x70
 [<ffffffff8182fe20>] netlink_sock_destruct+0x80/0x150
 [<ffffffff817e484d>] __sk_free+0x1d/0x160
 [<ffffffff817e49a9>] sk_free+0x19/0x20
[..]

Cong Wang says:

We can't hold mutex lock in a rcu callback, [..]

Thomas Graf says:

The socket should be dead at this point. It might be simpler to
add a netlink_release_ring() function which doesn't require
locking at all.

Reported-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Diagnosed-by: Cong Wang <cwang@twopensource.com>
Suggested-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 79 ++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9a0ae7172f92..d8e2e3918ce2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -357,25 +357,52 @@ err1:
 	return NULL;
 }
 
+
+static void
+__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+		   unsigned int order)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct sk_buff_head *queue;
+	struct netlink_ring *ring;
+
+	queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+	ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+
+	spin_lock_bh(&queue->lock);
+
+	ring->frame_max		= req->nm_frame_nr - 1;
+	ring->head		= 0;
+	ring->frame_size	= req->nm_frame_size;
+	ring->pg_vec_pages	= req->nm_block_size / PAGE_SIZE;
+
+	swap(ring->pg_vec_len, req->nm_block_nr);
+	swap(ring->pg_vec_order, order);
+	swap(ring->pg_vec, pg_vec);
+
+	__skb_queue_purge(queue);
+	spin_unlock_bh(&queue->lock);
+
+	WARN_ON(atomic_read(&nlk->mapped));
+
+	if (pg_vec)
+		free_pg_vec(pg_vec, order, req->nm_block_nr);
+}
+
 static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
-			    bool closing, bool tx_ring)
+			    bool tx_ring)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct netlink_ring *ring;
-	struct sk_buff_head *queue;
 	void **pg_vec = NULL;
 	unsigned int order = 0;
-	int err;
 
 	ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
-	queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
 
-	if (!closing) {
-		if (atomic_read(&nlk->mapped))
-			return -EBUSY;
-		if (atomic_read(&ring->pending))
-			return -EBUSY;
-	}
+	if (atomic_read(&nlk->mapped))
+		return -EBUSY;
+	if (atomic_read(&ring->pending))
+		return -EBUSY;
 
 	if (req->nm_block_nr) {
 		if (ring->pg_vec != NULL)
@@ -407,31 +434,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
 			return -EINVAL;
 	}
 
-	err = -EBUSY;
 	mutex_lock(&nlk->pg_vec_lock);
-	if (closing || atomic_read(&nlk->mapped) == 0) {
-		err = 0;
-		spin_lock_bh(&queue->lock);
-
-		ring->frame_max		= req->nm_frame_nr - 1;
-		ring->head		= 0;
-		ring->frame_size	= req->nm_frame_size;
-		ring->pg_vec_pages	= req->nm_block_size / PAGE_SIZE;
-
-		swap(ring->pg_vec_len, req->nm_block_nr);
-		swap(ring->pg_vec_order, order);
-		swap(ring->pg_vec, pg_vec);
-
-		__skb_queue_purge(queue);
-		spin_unlock_bh(&queue->lock);
-
-		WARN_ON(atomic_read(&nlk->mapped));
+	if (atomic_read(&nlk->mapped) == 0) {
+		__netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+		mutex_unlock(&nlk->pg_vec_lock);
+		return 0;
 	}
+
 	mutex_unlock(&nlk->pg_vec_lock);
 
 	if (pg_vec)
 		free_pg_vec(pg_vec, order, req->nm_block_nr);
-	return err;
+
+	return -EBUSY;
 }
 
 static void netlink_mm_open(struct vm_area_struct *vma)
@@ -900,10 +915,10 @@ static void netlink_sock_destruct(struct sock *sk)
 
 		memset(&req, 0, sizeof(req));
 		if (nlk->rx_ring.pg_vec)
-			netlink_set_ring(sk, &req, true, false);
+			__netlink_set_ring(sk, &req, false, NULL, 0);
 		memset(&req, 0, sizeof(req));
 		if (nlk->tx_ring.pg_vec)
-			netlink_set_ring(sk, &req, true, true);
+			__netlink_set_ring(sk, &req, true, NULL, 0);
 	}
 #endif /* CONFIG_NETLINK_MMAP */
 
@@ -2223,7 +2238,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 			return -EINVAL;
 		if (copy_from_user(&req, optval, sizeof(req)))
 			return -EFAULT;
-		err = netlink_set_ring(sk, &req, false,
+		err = netlink_set_ring(sk, &req,
 				       optname == NETLINK_TX_RING);
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From bac541e46319c2dc6ff9fd5ad6df59cd38686bdb Mon Sep 17 00:00:00 2001
From: Chris J Arges <chris.j.arges@canonical.com>
Date: Tue, 21 Jul 2015 12:36:33 -0500
Subject: openvswitch: allocate nr_node_ids flow_stats instead of
 num_possible_nodes

Some architectures like POWER can have a NUMA node_possible_map that
contains sparse entries. This causes memory corruption with openvswitch
since it allocates flow_cache with a multiple of num_possible_nodes() and
assumes the node variable returned by for_each_node will index into
flow->stats[node].

Use nr_node_ids to allocate a maximal sparse array instead of
num_possible_nodes().

The crash was noticed after 3af229f2 was applied as it changed the
node_possible_map to match node_online_map on boot.
Fixes: 3af229f2071f5b5cb31664be6109561fbe19c861

Signed-off-by: Chris J Arges <chris.j.arges@canonical.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 4613df8c8290..65523948fb95 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -752,7 +752,7 @@ int ovs_flow_init(void)
 	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
 
 	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
-				       + (num_possible_nodes()
+				       + (nr_node_ids
 					  * sizeof(struct flow_stats *)),
 				       0, 0, NULL);
 	if (flow_cache == NULL)
-- 
cgit v1.2.3-59-g8ed1b