From f623f75ae443d0c771635d51cc986b9d389bf631 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Aug 2018 00:35:24 +0200
Subject: rfkill-gpio: include linux/mod_devicetable.h

One more driver is apparently broken by the recent change
to linux/platform_device.h:

net/rfkill/rfkill-gpio.c: In function 'rfkill_gpio_acpi_probe':
net/rfkill/rfkill-gpio.c:82:29: error: dereferencing pointer to incomplete type 'const struct acpi_device_id'

Include linux/mod_devicetable.h to get the definition of the
acpi_device_id structure.

Fixes: ac3167257b9f ("headers: separate linux/mod_devicetable.h from linux/platform_device.h")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/rfkill/rfkill-gpio.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 00192a996be0..0f8465852254 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/rfkill.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
-- 
cgit v1.2.3-59-g8ed1b


From 77cfaf52eca5cac30ed029507e0cab065f888995 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Mon, 13 Aug 2018 14:16:25 +0200
Subject: mac80211: Run TXQ teardown code before de-registering interfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TXQ teardown code can reference the vif data structures that are
stored in the netdev private memory area if there are still packets on
the queue when it is being freed. Since the TXQ teardown code is run
after the netdevs are freed, this can lead to a use-after-free. Fix this
by moving the TXQ teardown code to earlier in ieee80211_unregister_hw().

Reported-by: Ben Greear <greearb@candelatech.com>
Tested-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index fb73451ed85e..0358f20b675f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1182,6 +1182,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 #if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&local->ifa6_notifier);
 #endif
+	ieee80211_txq_teardown_flows(local);
 
 	rtnl_lock();
 
@@ -1210,7 +1211,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	skb_queue_purge(&local->skb_queue);
 	skb_queue_purge(&local->skb_queue_unreliable);
 	skb_queue_purge(&local->skb_queue_tdls_chsw);
-	ieee80211_txq_teardown_flows(local);
 
 	destroy_workqueue(local->workqueue);
 	wiphy_unregister(local->hw.wiphy);
-- 
cgit v1.2.3-59-g8ed1b


From 8a54d8fc160e67ad485d95a0322ce1221f80770a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 18 Jun 2018 09:29:57 +0200
Subject: cfg80211: remove division by size of sizeof(struct
 ieee80211_wmm_rule)

Pointer arithmetic already adjusts by the size of the struct,
so the sizeof() calculation is wrong. This is basically the
same as Colin King's patch for similar code in the iwlwifi
driver.

Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4fc66a117b7d..283902974fbf 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -452,8 +452,7 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 			continue;
 
 		regd->reg_rules[i].wmm_rule = d_wmm +
-			(src_regd->reg_rules[i].wmm_rule - s_wmm) /
-			sizeof(struct ieee80211_wmm_rule);
+			(src_regd->reg_rules[i].wmm_rule - s_wmm);
 	}
 	return regd;
 }
-- 
cgit v1.2.3-59-g8ed1b


From bab1be79a5169ac748d8292b20c86d874022d7ba Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 27 Aug 2018 18:38:31 +0800
Subject: sctp: hold transport before accessing its asoc in
 sctp_transport_get_next

As Marcelo noticed, in sctp_transport_get_next, it is iterating over
transports but then also accessing the association directly, without
checking any refcnts before that, which can cause an use-after-free
Read.

So fix it by holding transport before accessing the association. With
that, sctp_transport_hold calls can be removed in the later places.

Fixes: 626d16f50f39 ("sctp: export some apis or variables for sctp_diag and reuse some for proc")
Reported-by: syzbot+fe62a0c9aa6a85c6de16@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/proc.c   |  4 ----
 net/sctp/socket.c | 22 +++++++++++++++-------
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ef5c9a82d4e8..4d6f1c8d6659 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -264,8 +264,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	}
 
 	transport = (struct sctp_transport *)v;
-	if (!sctp_transport_hold(transport))
-		return 0;
 	assoc = transport->asoc;
 	epb = &assoc->base;
 	sk = epb->sk;
@@ -322,8 +320,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 	}
 
 	transport = (struct sctp_transport *)v;
-	if (!sctp_transport_hold(transport))
-		return 0;
 	assoc = transport->asoc;
 
 	list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e96b15a66aba..aa76586a1a1c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5005,9 +5005,14 @@ struct sctp_transport *sctp_transport_get_next(struct net *net,
 			break;
 		}
 
+		if (!sctp_transport_hold(t))
+			continue;
+
 		if (net_eq(sock_net(t->asoc->base.sk), net) &&
 		    t->asoc->peer.primary_path == t)
 			break;
+
+		sctp_transport_put(t);
 	}
 
 	return t;
@@ -5017,13 +5022,18 @@ struct sctp_transport *sctp_transport_get_idx(struct net *net,
 					      struct rhashtable_iter *iter,
 					      int pos)
 {
-	void *obj = SEQ_START_TOKEN;
+	struct sctp_transport *t;
 
-	while (pos && (obj = sctp_transport_get_next(net, iter)) &&
-	       !IS_ERR(obj))
-		pos--;
+	if (!pos)
+		return SEQ_START_TOKEN;
 
-	return obj;
+	while ((t = sctp_transport_get_next(net, iter)) && !IS_ERR(t)) {
+		if (!--pos)
+			break;
+		sctp_transport_put(t);
+	}
+
+	return t;
 }
 
 int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *),
@@ -5082,8 +5092,6 @@ again:
 
 	tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
 	for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
-		if (!sctp_transport_hold(tsp))
-			continue;
 		ret = cb(tsp, p);
 		if (ret)
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 834539e69a5fe2aab33cc777ccfd4a4fcc5b9770 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 27 Aug 2018 18:40:18 +0800
Subject: sctp: remove useless start_fail from sctp_ht_iter in proc

After changing rhashtable_walk_start to return void, start_fail would
never be set other value than 0, and the checking for start_fail is
pointless, so remove it.

Fixes: 97a6ec4ac021 ("rhashtable: Change rhashtable_walk_start to return void")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/proc.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 4d6f1c8d6659..a644292f9faf 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -215,7 +215,6 @@ static const struct seq_operations sctp_eps_ops = {
 struct sctp_ht_iter {
 	struct seq_net_private p;
 	struct rhashtable_iter hti;
-	int start_fail;
 };
 
 static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
@@ -224,7 +223,6 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
 
 	sctp_transport_walk_start(&iter->hti);
 
-	iter->start_fail = 0;
 	return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
 }
 
@@ -232,8 +230,6 @@ static void sctp_transport_seq_stop(struct seq_file *seq, void *v)
 {
 	struct sctp_ht_iter *iter = seq->private;
 
-	if (iter->start_fail)
-		return;
 	sctp_transport_walk_stop(&iter->hti);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 84581bdae9587023cea1d139523f0ef0f28bd88d Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 27 Aug 2018 18:41:32 +0800
Subject: erspan: set erspan_ver to 1 by default when adding an erspan dev

After erspan_ver is introudced, if erspan_ver is not set in iproute, its
value will be left 0 by default. Since Commit 02f99df1875c ("erspan: fix
invalid erspan version."), it has broken the traffic due to the version
check in erspan_xmit if users are not aware of 'erspan_ver' param, like
using an old version of iproute.

To fix this compatibility problem, it sets erspan_ver to 1 by default
when adding an erspan dev in erspan_setup. Note that we can't do it in
ipgre_netlink_parms, as this function is also used by ipgre_changelink.

Fixes: 02f99df1875c ("erspan: fix invalid erspan version.")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c  | 3 +++
 net/ipv6/ip6_gre.c | 1 +
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 51a5d06085ac..ae714aecc31c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1508,11 +1508,14 @@ nla_put_failure:
 
 static void erspan_setup(struct net_device *dev)
 {
+	struct ip_tunnel *t = netdev_priv(dev);
+
 	ether_setup(dev);
 	dev->netdev_ops = &erspan_netdev_ops;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	ip_tunnel_setup(dev, erspan_net_id);
+	t->erspan_ver = 1;
 }
 
 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 18a3794b0f52..e493b041d4ac 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1778,6 +1778,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
 	if (data[IFLA_GRE_COLLECT_METADATA])
 		parms->collect_md = true;
 
+	parms->erspan_ver = 1;
 	if (data[IFLA_GRE_ERSPAN_VER])
 		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
 
-- 
cgit v1.2.3-59-g8ed1b


From d5ed72a55bc0a321ef33d272e2b0bf0d2b06d1fe Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Aug 2018 20:58:43 +0200
Subject: net: sched: fix extack error message when chain is failed to be
 created

Instead "Cannot find" say "Cannot create".

Fixes: c35a4acc2985 ("net: sched: cls_api: handle generic cls errors")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 31bd1439cf60..2d41c5b21b48 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1252,7 +1252,7 @@ replay:
 	}
 	chain = tcf_chain_get(block, chain_index, true);
 	if (!chain) {
-		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
+		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
 		err = -ENOMEM;
 		goto errout;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From b7b4247d553939ccf02ff597ec60f41a2f93ee8e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Aug 2018 20:58:44 +0200
Subject: net: sched: return -ENOENT when trying to remove filter from
 non-existent chain

When chain 0 was implicitly created, removal of non-existent filter from
chain 0 gave -ENOENT. Once chain 0 became non-implicit, the same call is
giving -EINVAL. Fix this by returning -ENOENT in that case.

Reported-by: Roman Mashak <mrv@mojatatu.com>
Fixes: f71e0ca4db18 ("net: sched: Avoid implicit chain 0 creation")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2d41c5b21b48..1a67af8a6e8c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1399,7 +1399,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 			goto errout;
 		}
 		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
-		err = -EINVAL;
+		err = -ENOENT;
 		goto errout;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 30935198b7d0be12b1c45c328b66a7fdefb16256 Mon Sep 17 00:00:00 2001
From: Haiqing Bai <Haiqing.Bai@windriver.com>
Date: Mon, 27 Aug 2018 09:32:26 +0800
Subject: tipc: fix the big/little endian issue in tipc_dest

In function tipc_dest_push, the 32bit variables 'node' and 'port'
are stored separately in uppper and lower part of 64bit 'value'.
Then this value is assigned to dst->value which is a union like:
union
{
  struct {
    u32 port;
    u32 node;
  };
  u64 value;
}
This works on little-endian machines like x86 but fails on big-endian
machines.

The fix remove the 'value' stack parameter and even the 'value'
member of the union in tipc_dest, assign the 'node' and 'port' member
directly with the input parameter to avoid the endian issue.

Fixes: a80ae5306a73 ("tipc: improve destination linked list")
Signed-off-by: Zhenbo Gao <zhenbo.gao@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Haiqing Bai <Haiqing.Bai@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 10 ++++------
 net/tipc/name_table.h |  9 ++-------
 2 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 88f027b502f6..66d5b2c5987a 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -980,20 +980,17 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
 {
-	u64 value = (u64)node << 32 | port;
 	struct tipc_dest *dst;
 
 	list_for_each_entry(dst, l, list) {
-		if (dst->value != value)
-			continue;
-		return dst;
+		if (dst->node == node && dst->port == port)
+			return dst;
 	}
 	return NULL;
 }
 
 bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
 {
-	u64 value = (u64)node << 32 | port;
 	struct tipc_dest *dst;
 
 	if (tipc_dest_find(l, node, port))
@@ -1002,7 +999,8 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
 	dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
 	if (unlikely(!dst))
 		return false;
-	dst->value = value;
+	dst->node = node;
+	dst->port = port;
 	list_add(&dst->list, l);
 	return true;
 }
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 0febba41da86..892bd750b85f 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -133,13 +133,8 @@ void tipc_nametbl_stop(struct net *net);
 
 struct tipc_dest {
 	struct list_head list;
-	union {
-		struct {
-			u32 port;
-			u32 node;
-		};
-		u64 value;
-	};
+	u32 port;
+	u32 node;
 };
 
 struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
-- 
cgit v1.2.3-59-g8ed1b


From ad8619864f0c9bd89e14d957afa3fd8aaf0720da Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 27 Aug 2018 00:20:11 +0200
Subject: net: dsa: Drop GPIO includes

Commit 52638f71fcff ("dsa: Move gpio reset into switch driver")
moved the GPIO handling into the switch drivers but forgot
to remove the GPIO header includes.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index e63c554e0623..9f3209ff7ffd 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -19,12 +19,10 @@
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
-#include <linux/of_gpio.h>
 #include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <linux/phy_fixed.h>
 #include <linux/ptp_classify.h>
-#include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
 
 #include "dsa_priv.h"
-- 
cgit v1.2.3-59-g8ed1b


From 53ae914d898e5dd5984d352d5fa0b23410f966a0 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Sat, 25 Aug 2018 15:19:05 +0800
Subject: net/rds: Use rdma_read_gids to get connection SGID/DGID in IPv6

In IPv4, the newly introduced rdma_read_gids is used to read the SGID/DGID
for the connection which returns GID correctly for RoCE transport as well.

In IPv6, rdma_read_gids is also used. The following are why rdma_read_gids
is introduced.

rdma_addr_get_dgid() for RoCE for client side connections returns MAC
address, instead of DGID.
rdma_addr_get_sgid() for RoCE doesn't return correct SGID for IPv6 and
when more than one IP address is assigned to the netdevice.

So the transport agnostic rdma_read_gids() API is provided by rdma_cm
module.

Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib.c b/net/rds/ib.c
index c1d97640c0be..eba75c1ba359 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -341,15 +341,10 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
-		struct rdma_dev_addr *dev_addr;
 
 		ic = conn->c_transport_data;
-		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
-		rdma_addr_get_sgid(dev_addr,
-				   (union ib_gid *)&iinfo6->src_gid);
-		rdma_addr_get_dgid(dev_addr,
-				   (union ib_gid *)&iinfo6->dst_gid);
-
+		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid,
+			       (union ib_gid *)&iinfo6->dst_gid);
 		rds_ibdev = ic->rds_ibdev;
 		iinfo6->max_send_wr = ic->i_send_ring.w_nr;
 		iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;
-- 
cgit v1.2.3-59-g8ed1b


From 3f6e138d41ddff196f452993528cfe75762ede0f Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Mon, 27 Aug 2018 21:30:42 +0200
Subject: bpf: fix build error with clang

Building the newly introduced BPF_PROG_TYPE_SK_REUSEPORT leads to
a compile time error when building with clang:
net/core/filter.o: In function `sk_reuseport_convert_ctx_access':
  ../net/core/filter.c:7284: undefined reference to `__compiletime_assert_7284'

It seems that clang has issues resolving hweight_long at compile
time. Since SK_FL_PROTO_MASK is a constant, we can use the interface
for known constant arguments which works fine with clang.

Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT")
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index c25eb36f1320..7a2430945c71 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7281,7 +7281,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
 		break;
 
 	case offsetof(struct sk_reuseport_md, ip_protocol):
-		BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
+		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
 		SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
 						    BPF_W, 0);
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
-- 
cgit v1.2.3-59-g8ed1b


From 67d1ba8a6dc83d90cd58b89fa6cbf9ae35a0cf7f Mon Sep 17 00:00:00 2001
From: Danek Duvall <duvall@comfychair.org>
Date: Wed, 22 Aug 2018 16:01:04 -0700
Subject: mac80211: correct use of IEEE80211_VHT_CAP_RXSTBC_X

The mod mask for VHT capabilities intends to say that you can override
the number of STBC receive streams, and it does, but only by accident.
The IEEE80211_VHT_CAP_RXSTBC_X aren't bits to be set, but values (albeit
left-shifted).  ORing the bits together gets the right answer, but we
should use the _MASK macro here instead.

Signed-off-by: Danek Duvall <duvall@comfychair.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0358f20b675f..27cd64acaf00 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -470,10 +470,7 @@ static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
 		cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC |
 			    IEEE80211_VHT_CAP_SHORT_GI_80 |
 			    IEEE80211_VHT_CAP_SHORT_GI_160 |
-			    IEEE80211_VHT_CAP_RXSTBC_1 |
-			    IEEE80211_VHT_CAP_RXSTBC_2 |
-			    IEEE80211_VHT_CAP_RXSTBC_3 |
-			    IEEE80211_VHT_CAP_RXSTBC_4 |
+			    IEEE80211_VHT_CAP_RXSTBC_MASK |
 			    IEEE80211_VHT_CAP_TXSTBC |
 			    IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
 			    IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
-- 
cgit v1.2.3-59-g8ed1b


From 38cb87ee47fb825f6c9d645c019f75b3905c0ab2 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 22 Aug 2018 13:52:21 +0200
Subject: cfg80211: make wmm_rule part of the reg_rule structure

Make wmm_rule be part of the reg_rule structure. This simplifies the
code a lot at the cost of having bigger memory usage. However in most
cases we have only few reg_rule's and when we do have many like in
iwlwifi we do not save memory as it allocates a separate wmm_rule for
each channel anyway.

This also fixes a bug reported in various places where somewhere the
pointers were corrupted and we ended up doing a null-dereference.

Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database")
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
[rephrase commit message slightly]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 50 ++----------
 include/net/cfg80211.h                             |  4 +-
 include/net/regulatory.h                           |  4 +-
 net/mac80211/util.c                                |  8 +-
 net/wireless/nl80211.c                             | 10 +--
 net/wireless/reg.c                                 | 90 ++++------------------
 6 files changed, 31 insertions(+), 135 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index b815ba38dbdb..88121548eb9f 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -877,15 +877,12 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 	const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ?
 			     iwl_ext_nvm_channels : iwl_nvm_channels;
 	struct ieee80211_regdomain *regd, *copy_rd;
-	int size_of_regd, regd_to_copy, wmms_to_copy;
-	int size_of_wmms = 0;
+	int size_of_regd, regd_to_copy;
 	struct ieee80211_reg_rule *rule;
-	struct ieee80211_wmm_rule *wmm_rule, *d_wmm, *s_wmm;
 	struct regdb_ptrs *regdb_ptrs;
 	enum nl80211_band band;
 	int center_freq, prev_center_freq = 0;
-	int valid_rules = 0, n_wmms = 0;
-	int i;
+	int valid_rules = 0;
 	bool new_rule;
 	int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ?
 			 IWL_NVM_NUM_CHANNELS_EXT : IWL_NVM_NUM_CHANNELS;
@@ -904,11 +901,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 		sizeof(struct ieee80211_regdomain) +
 		num_of_ch * sizeof(struct ieee80211_reg_rule);
 
-	if (geo_info & GEO_WMM_ETSI_5GHZ_INFO)
-		size_of_wmms =
-			num_of_ch * sizeof(struct ieee80211_wmm_rule);
-
-	regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL);
+	regd = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!regd)
 		return ERR_PTR(-ENOMEM);
 
@@ -922,8 +915,6 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 	regd->alpha2[0] = fw_mcc >> 8;
 	regd->alpha2[1] = fw_mcc & 0xff;
 
-	wmm_rule = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
-
 	for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
 		ch_flags = (u16)__le32_to_cpup(channels + ch_idx);
 		band = (ch_idx < NUM_2GHZ_CHANNELS) ?
@@ -977,26 +968,10 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 		    band == NL80211_BAND_2GHZ)
 			continue;
 
-		if (!reg_query_regdb_wmm(regd->alpha2, center_freq,
-					 &regdb_ptrs[n_wmms].token, wmm_rule)) {
-			/* Add only new rules */
-			for (i = 0; i < n_wmms; i++) {
-				if (regdb_ptrs[i].token ==
-				    regdb_ptrs[n_wmms].token) {
-					rule->wmm_rule = regdb_ptrs[i].rule;
-					break;
-				}
-			}
-			if (i == n_wmms) {
-				rule->wmm_rule = wmm_rule;
-				regdb_ptrs[n_wmms++].rule = wmm_rule;
-				wmm_rule++;
-			}
-		}
+		reg_query_regdb_wmm(regd->alpha2, center_freq, rule);
 	}
 
 	regd->n_reg_rules = valid_rules;
-	regd->n_wmm_rules = n_wmms;
 
 	/*
 	 * Narrow down regdom for unused regulatory rules to prevent hole
@@ -1005,28 +980,13 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 	regd_to_copy = sizeof(struct ieee80211_regdomain) +
 		valid_rules * sizeof(struct ieee80211_reg_rule);
 
-	wmms_to_copy = sizeof(struct ieee80211_wmm_rule) * n_wmms;
-
-	copy_rd = kzalloc(regd_to_copy + wmms_to_copy, GFP_KERNEL);
+	copy_rd = kzalloc(regd_to_copy, GFP_KERNEL);
 	if (!copy_rd) {
 		copy_rd = ERR_PTR(-ENOMEM);
 		goto out;
 	}
 
 	memcpy(copy_rd, regd, regd_to_copy);
-	memcpy((u8 *)copy_rd + regd_to_copy, (u8 *)regd + size_of_regd,
-	       wmms_to_copy);
-
-	d_wmm = (struct ieee80211_wmm_rule *)((u8 *)copy_rd + regd_to_copy);
-	s_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
-
-	for (i = 0; i < regd->n_reg_rules; i++) {
-		if (!regd->reg_rules[i].wmm_rule)
-			continue;
-
-		copy_rd->reg_rules[i].wmm_rule = d_wmm +
-			(regd->reg_rules[i].wmm_rule - s_wmm);
-	}
 
 out:
 	kfree(regdb_ptrs);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1beb3ead0385..7229c186d199 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4763,8 +4763,8 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
  *
  * Return: 0 on success. -ENODATA.
  */
-int reg_query_regdb_wmm(char *alpha2, int freq, u32 *ptr,
-			struct ieee80211_wmm_rule *rule);
+int reg_query_regdb_wmm(char *alpha2, int freq,
+			struct ieee80211_reg_rule *rule);
 
 /*
  * callbacks for asynchronous cfg80211 methods, notification
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 60f8cc86a447..3469750df0f4 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -217,15 +217,15 @@ struct ieee80211_wmm_rule {
 struct ieee80211_reg_rule {
 	struct ieee80211_freq_range freq_range;
 	struct ieee80211_power_rule power_rule;
-	struct ieee80211_wmm_rule *wmm_rule;
+	struct ieee80211_wmm_rule wmm_rule;
 	u32 flags;
 	u32 dfs_cac_ms;
+	bool has_wmm;
 };
 
 struct ieee80211_regdomain {
 	struct rcu_head rcu_head;
 	u32 n_reg_rules;
-	u32 n_wmm_rules;
 	char alpha2[3];
 	enum nl80211_dfs_regions dfs_region;
 	struct ieee80211_reg_rule reg_rules[];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d02fbfec3783..c80187d6e6bb 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1120,7 +1120,7 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	const struct ieee80211_reg_rule *rrule;
-	struct ieee80211_wmm_ac *wmm_ac;
+	const struct ieee80211_wmm_ac *wmm_ac;
 	u16 center_freq = 0;
 
 	if (sdata->vif.type != NL80211_IFTYPE_AP &&
@@ -1139,15 +1139,15 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 
 	rrule = freq_reg_info(sdata->wdev.wiphy, MHZ_TO_KHZ(center_freq));
 
-	if (IS_ERR_OR_NULL(rrule) || !rrule->wmm_rule) {
+	if (IS_ERR_OR_NULL(rrule) || !rrule->has_wmm) {
 		rcu_read_unlock();
 		return;
 	}
 
 	if (sdata->vif.type == NL80211_IFTYPE_AP)
-		wmm_ac = &rrule->wmm_rule->ap[ac];
+		wmm_ac = &rrule->wmm_rule.ap[ac];
 	else
-		wmm_ac = &rrule->wmm_rule->client[ac];
+		wmm_ac = &rrule->wmm_rule.client[ac];
 	qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min);
 	qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max);
 	qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 80bc986c79e5..e3dcffd96919 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -667,13 +667,13 @@ static int nl80211_msg_put_wmm_rules(struct sk_buff *msg,
 			goto nla_put_failure;
 
 		if (nla_put_u16(msg, NL80211_WMMR_CW_MIN,
-				rule->wmm_rule->client[j].cw_min) ||
+				rule->wmm_rule.client[j].cw_min) ||
 		    nla_put_u16(msg, NL80211_WMMR_CW_MAX,
-				rule->wmm_rule->client[j].cw_max) ||
+				rule->wmm_rule.client[j].cw_max) ||
 		    nla_put_u8(msg, NL80211_WMMR_AIFSN,
-			       rule->wmm_rule->client[j].aifsn) ||
+			       rule->wmm_rule.client[j].aifsn) ||
 		    nla_put_u8(msg, NL80211_WMMR_TXOP,
-			       rule->wmm_rule->client[j].cot))
+			       rule->wmm_rule.client[j].cot))
 			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_wmm_rule);
@@ -766,7 +766,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
 		const struct ieee80211_reg_rule *rule =
 			freq_reg_info(wiphy, chan->center_freq);
 
-		if (!IS_ERR(rule) && rule->wmm_rule) {
+		if (!IS_ERR_OR_NULL(rule) && rule->has_wmm) {
 			if (nl80211_msg_put_wmm_rules(msg, rule))
 				goto nla_put_failure;
 		}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 283902974fbf..2f702adf2912 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -425,35 +425,23 @@ static const struct ieee80211_regdomain *
 reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 {
 	struct ieee80211_regdomain *regd;
-	int size_of_regd, size_of_wmms;
+	int size_of_regd;
 	unsigned int i;
-	struct ieee80211_wmm_rule *d_wmm, *s_wmm;
 
 	size_of_regd =
 		sizeof(struct ieee80211_regdomain) +
 		src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule);
-	size_of_wmms = src_regd->n_wmm_rules *
-		sizeof(struct ieee80211_wmm_rule);
 
-	regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL);
+	regd = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!regd)
 		return ERR_PTR(-ENOMEM);
 
 	memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
 
-	d_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
-	s_wmm = (struct ieee80211_wmm_rule *)((u8 *)src_regd + size_of_regd);
-	memcpy(d_wmm, s_wmm, size_of_wmms);
-
-	for (i = 0; i < src_regd->n_reg_rules; i++) {
+	for (i = 0; i < src_regd->n_reg_rules; i++)
 		memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
 		       sizeof(struct ieee80211_reg_rule));
-		if (!src_regd->reg_rules[i].wmm_rule)
-			continue;
 
-		regd->reg_rules[i].wmm_rule = d_wmm +
-			(src_regd->reg_rules[i].wmm_rule - s_wmm);
-	}
 	return regd;
 }
 
@@ -859,9 +847,10 @@ static bool valid_regdb(const u8 *data, unsigned int size)
 	return true;
 }
 
-static void set_wmm_rule(struct ieee80211_wmm_rule *rule,
+static void set_wmm_rule(struct ieee80211_reg_rule *rrule,
 			 struct fwdb_wmm_rule *wmm)
 {
+	struct ieee80211_wmm_rule *rule = &rrule->wmm_rule;
 	unsigned int i;
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
@@ -875,11 +864,13 @@ static void set_wmm_rule(struct ieee80211_wmm_rule *rule,
 		rule->ap[i].aifsn = wmm->ap[i].aifsn;
 		rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot);
 	}
+
+	rrule->has_wmm = true;
 }
 
 static int __regdb_query_wmm(const struct fwdb_header *db,
 			     const struct fwdb_country *country, int freq,
-			     u32 *dbptr, struct ieee80211_wmm_rule *rule)
+			     struct ieee80211_reg_rule *rule)
 {
 	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
 	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
@@ -900,8 +891,6 @@ static int __regdb_query_wmm(const struct fwdb_header *db,
 			wmm_ptr = be16_to_cpu(rrule->wmm_ptr) << 2;
 			wmm = (void *)((u8 *)db + wmm_ptr);
 			set_wmm_rule(rule, wmm);
-			if (dbptr)
-				*dbptr = wmm_ptr;
 			return 0;
 		}
 	}
@@ -909,8 +898,7 @@ static int __regdb_query_wmm(const struct fwdb_header *db,
 	return -ENODATA;
 }
 
-int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
-			struct ieee80211_wmm_rule *rule)
+int reg_query_regdb_wmm(char *alpha2, int freq, struct ieee80211_reg_rule *rule)
 {
 	const struct fwdb_header *hdr = regdb;
 	const struct fwdb_country *country;
@@ -924,8 +912,7 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
 	country = &hdr->country[0];
 	while (country->coll_ptr) {
 		if (alpha2_equal(alpha2, country->alpha2))
-			return __regdb_query_wmm(regdb, country, freq, dbptr,
-						 rule);
+			return __regdb_query_wmm(regdb, country, freq, rule);
 
 		country++;
 	}
@@ -934,32 +921,13 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
 }
 EXPORT_SYMBOL(reg_query_regdb_wmm);
 
-struct wmm_ptrs {
-	struct ieee80211_wmm_rule *rule;
-	u32 ptr;
-};
-
-static struct ieee80211_wmm_rule *find_wmm_ptr(struct wmm_ptrs *wmm_ptrs,
-					       u32 wmm_ptr, int n_wmms)
-{
-	int i;
-
-	for (i = 0; i < n_wmms; i++) {
-		if (wmm_ptrs[i].ptr == wmm_ptr)
-			return wmm_ptrs[i].rule;
-	}
-	return NULL;
-}
-
 static int regdb_query_country(const struct fwdb_header *db,
 			       const struct fwdb_country *country)
 {
 	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
 	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
 	struct ieee80211_regdomain *regdom;
-	struct ieee80211_regdomain *tmp_rd;
-	unsigned int size_of_regd, i, n_wmms = 0;
-	struct wmm_ptrs *wmm_ptrs;
+	unsigned int size_of_regd, i;
 
 	size_of_regd = sizeof(struct ieee80211_regdomain) +
 		coll->n_rules * sizeof(struct ieee80211_reg_rule);
@@ -968,12 +936,6 @@ static int regdb_query_country(const struct fwdb_header *db,
 	if (!regdom)
 		return -ENOMEM;
 
-	wmm_ptrs = kcalloc(coll->n_rules, sizeof(*wmm_ptrs), GFP_KERNEL);
-	if (!wmm_ptrs) {
-		kfree(regdom);
-		return -ENOMEM;
-	}
-
 	regdom->n_reg_rules = coll->n_rules;
 	regdom->alpha2[0] = country->alpha2[0];
 	regdom->alpha2[1] = country->alpha2[1];
@@ -1012,37 +974,11 @@ static int regdb_query_country(const struct fwdb_header *db,
 				1000 * be16_to_cpu(rule->cac_timeout);
 		if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) {
 			u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
-			struct ieee80211_wmm_rule *wmm_pos =
-				find_wmm_ptr(wmm_ptrs, wmm_ptr, n_wmms);
-			struct fwdb_wmm_rule *wmm;
-			struct ieee80211_wmm_rule *wmm_rule;
-
-			if (wmm_pos) {
-				rrule->wmm_rule = wmm_pos;
-				continue;
-			}
-			wmm = (void *)((u8 *)db + wmm_ptr);
-			tmp_rd = krealloc(regdom, size_of_regd + (n_wmms + 1) *
-					  sizeof(struct ieee80211_wmm_rule),
-					  GFP_KERNEL);
-
-			if (!tmp_rd) {
-				kfree(regdom);
-				kfree(wmm_ptrs);
-				return -ENOMEM;
-			}
-			regdom = tmp_rd;
-
-			wmm_rule = (struct ieee80211_wmm_rule *)
-				((u8 *)regdom + size_of_regd + n_wmms *
-				sizeof(struct ieee80211_wmm_rule));
+			struct fwdb_wmm_rule *wmm = (void *)((u8 *)db + wmm_ptr);
 
-			set_wmm_rule(wmm_rule, wmm);
-			wmm_ptrs[n_wmms].ptr = wmm_ptr;
-			wmm_ptrs[n_wmms++].rule = wmm_rule;
+			set_wmm_rule(rrule, wmm);
 		}
 	}
-	kfree(wmm_ptrs);
 
 	return reg_schedule_apply(regdom);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 20932750d9c78d307e4f2f18f9c6a32b82b1e0e8 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 20 Aug 2018 13:56:07 +0300
Subject: mac80211: don't update the PM state of a peer upon a multicast frame

I changed the way mac80211 updates the PM state of the peer.
I forgot that we could also have multicast frames from the
peer and that those frame should of course not change the
PM state of the peer: A peer goes to power save when it
needs to scan, but it won't send the broadcast Probe Request
with the PM bit set.

This made us mark the peer as awake when it wasn't and then
Intel's firmware would fail to transmit because the peer is
asleep according to its database. The driver warned about
this and it looked like this:

 WARNING: CPU: 0 PID: 184 at /usr/src/linux-4.16.14/drivers/net/wireless/intel/iwlwifi/mvm/tx.c:1369 iwl_mvm_rx_tx_cmd+0x53b/0x860
 CPU: 0 PID: 184 Comm: irq/124-iwlwifi Not tainted 4.16.14 #1
 RIP: 0010:iwl_mvm_rx_tx_cmd+0x53b/0x860
 Call Trace:
  iwl_pcie_rx_handle+0x220/0x880
  iwl_pcie_irq_handler+0x6c9/0xa20
  ? irq_forced_thread_fn+0x60/0x60
  ? irq_thread_dtor+0x90/0x90

The relevant code that spits the WARNING is:

        case TX_STATUS_FAIL_DEST_PS:
                /* the FW should have stopped the queue and not
                 * return this status
                 */
                WARN_ON(1);
                info->flags |= IEEE80211_TX_STAT_TX_FILTERED;

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=199967.

Fixes: 9fef65443388 ("mac80211: always update the PM state of a peer on MGMT / DATA frames")
Cc: <stable@vger.kernel.org>   #4.16+
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 932985ca4e66..3f80a5ca4050 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1612,6 +1612,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	 */
 	if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
 	    !ieee80211_has_morefrags(hdr->frame_control) &&
+	    !is_multicast_ether_addr(hdr->addr1) &&
 	    (ieee80211_is_mgmt(hdr->frame_control) ||
 	     ieee80211_is_data(hdr->frame_control)) &&
 	    !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
-- 
cgit v1.2.3-59-g8ed1b


From d3c89bbc7491d5e288ca2993e999d24ba9ff52ad Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Tue, 21 Aug 2018 09:22:19 +0300
Subject: nl80211: Fix nla_put_u8 to u16 for NL80211_WMMR_TXOP

TXOP (also known as Channel Occupancy Time) is u16 and should be
added using nla_put_u16 instead of u8, fix that.

Fixes: 50f32718e125 ("nl80211: Add wmm rule attribute to NL80211_CMD_GET_WIPHY dump command")
Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e3dcffd96919..3f7ffbe6c634 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -672,8 +672,8 @@ static int nl80211_msg_put_wmm_rules(struct sk_buff *msg,
 				rule->wmm_rule.client[j].cw_max) ||
 		    nla_put_u8(msg, NL80211_WMMR_AIFSN,
 			       rule->wmm_rule.client[j].aifsn) ||
-		    nla_put_u8(msg, NL80211_WMMR_TXOP,
-			       rule->wmm_rule.client[j].cot))
+		    nla_put_u16(msg, NL80211_WMMR_TXOP,
+			        rule->wmm_rule.client[j].cot))
 			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_wmm_rule);
-- 
cgit v1.2.3-59-g8ed1b


From b88d26d97c41680f7327e5fb8061ad0037877f40 Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Tue, 21 Aug 2018 09:22:20 +0300
Subject: nl80211: Pass center frequency in kHz instead of MHz

freq_reg_info expects to get the frequency in kHz. Instead we
accidently pass it in MHz.  Thus, currently the function always
return ERR rule. Fix that.

Fixes: 50f32718e125 ("nl80211: Add wmm rule attribute to NL80211_CMD_GET_WIPHY dump command")
Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
[fix kHz/MHz in commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3f7ffbe6c634..ce0149a86c13 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -764,7 +764,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
 
 	if (large) {
 		const struct ieee80211_reg_rule *rule =
-			freq_reg_info(wiphy, chan->center_freq);
+			freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq));
 
 		if (!IS_ERR_OR_NULL(rule) && rule->has_wmm) {
 			if (nl80211_msg_put_wmm_rules(msg, rule))
-- 
cgit v1.2.3-59-g8ed1b


From 5b24109b0563d45094c470684c1f8cea1af269f8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 28 Aug 2018 16:15:35 +0200
Subject: bpf: fix several offset tests in bpf_msg_pull_data

While recently going over bpf_msg_pull_data(), I noticed three
issues which are fixed in here:

1) When we attempt to find the first scatterlist element (sge)
   for the start offset, we add len to the offset before we check
   for start < offset + len, whereas it should come after when
   we iterate to the next sge to accumulate the offsets. For
   example, given a start offset of 12 with a sge length of 8
   for the first sge in the list would lead us to determine this
   sge as the first sge thinking it covers first 16 bytes where
   start is located, whereas start sits in subsequent sges so
   we would end up pulling in the wrong data.

2) After figuring out the starting sge, we have a short-cut test
   in !msg->sg_copy[i] && bytes <= len. This checks whether it's
   not needed to make the page at the sge private where we can
   just exit by updating msg->data and msg->data_end. However,
   the length test is not fully correct. bytes <= len checks
   whether the requested bytes (end - start offsets) fit into the
   sge's length. The part that is missing is that start must not
   be sge length aligned. Meaning, the start offset into the sge
   needs to be accounted as well on top of the requested bytes
   as otherwise we can access the sge out of bounds. For example
   the sge could have length of 8, our requested bytes could have
   length of 8, but at a start offset of 4, so we also would need
   to pull in 4 bytes of the next sge, when we jump to the out
   label we do set msg->data to sg_virt(&sg[i]) + start - offset
   and msg->data_end to msg->data + bytes which would be oob.

3) The subsequent bytes < copy test for finding the last sge has
   the same issue as in point 2) but also it tests for less than
   rather than less or equal to. Meaning if the sge length is of
   8 and requested bytes of 8 while having the start aligned with
   the sge, we would unnecessarily go and pull in the next sge as
   well to make it private.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 7a2430945c71..ec4d67c0cf0c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2286,10 +2286,10 @@ BPF_CALL_4(bpf_msg_pull_data,
 	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
 {
 	unsigned int len = 0, offset = 0, copy = 0;
+	int bytes = end - start, bytes_sg_total;
 	struct scatterlist *sg = msg->sg_data;
 	int first_sg, last_sg, i, shift;
 	unsigned char *p, *to, *from;
-	int bytes = end - start;
 	struct page *page;
 
 	if (unlikely(flags || end <= start))
@@ -2299,9 +2299,9 @@ BPF_CALL_4(bpf_msg_pull_data,
 	i = msg->sg_start;
 	do {
 		len = sg[i].length;
-		offset += len;
 		if (start < offset + len)
 			break;
+		offset += len;
 		i++;
 		if (i == MAX_SKB_FRAGS)
 			i = 0;
@@ -2310,7 +2310,11 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
-	if (!msg->sg_copy[i] && bytes <= len)
+	/* The start may point into the sg element so we need to also
+	 * account for the headroom.
+	 */
+	bytes_sg_total = start - offset + bytes;
+	if (!msg->sg_copy[i] && bytes_sg_total <= len)
 		goto out;
 
 	first_sg = i;
@@ -2330,12 +2334,12 @@ BPF_CALL_4(bpf_msg_pull_data,
 		i++;
 		if (i == MAX_SKB_FRAGS)
 			i = 0;
-		if (bytes < copy)
+		if (bytes_sg_total <= copy)
 			break;
 	} while (i != msg->sg_end);
 	last_sg = i;
 
-	if (unlikely(copy < end - start))
+	if (unlikely(bytes_sg_total > copy))
 		return -EINVAL;
 
 	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
-- 
cgit v1.2.3-59-g8ed1b


From 1f631c3201fe5491808df143d8fcba81b3197ffd Mon Sep 17 00:00:00 2001
From: Yuan-Chi Pang <fu3mo6goo@gmail.com>
Date: Wed, 29 Aug 2018 09:30:08 +0800
Subject: mac80211: mesh: fix HWMP sequence numbering to follow standard

IEEE 802.11-2016 14.10.8.3 HWMP sequence numbering says:
If it is a target mesh STA, it shall update its own HWMP SN to
maximum (current HWMP SN, target HWMP SN in the PREQ element) + 1
immediately before it generates a PREP element in response to a
PREQ element.

Signed-off-by: Yuan-Chi Pang <fu3mo6goo@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_hwmp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 35ad3983ae4b..daf9db3c8f24 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -572,6 +572,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 		forward = false;
 		reply = true;
 		target_metric = 0;
+
+		if (SN_GT(target_sn, ifmsh->sn))
+			ifmsh->sn = target_sn;
+
 		if (time_after(jiffies, ifmsh->last_sn_update +
 					net_traversal_jiffies(sdata)) ||
 		    time_before(jiffies, ifmsh->last_sn_update)) {
-- 
cgit v1.2.3-59-g8ed1b


From 166ac9d55b0ab70b644e429be1f217fe8393cbd7 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 29 Aug 2018 08:57:02 +0200
Subject: mac80211: avoid kernel panic when building AMSDU from non-linear SKB

When building building AMSDU from non-linear SKB, we hit a
kernel panic when trying to push the padding to the tail.
Instead, put the padding at the head of the next subframe.
This also fixes the A-MSDU subframes to not have the padding
accounted in the length field and not have pad at all for
the last subframe, both required by the spec.

Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Reviewed-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index fa1f1e63a264..667a73d6eb5c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3073,27 +3073,18 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta)
 }
 
 static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local,
-					struct sk_buff *skb, int headroom,
-					int *subframe_len)
+					struct sk_buff *skb, int headroom)
 {
-	int amsdu_len = *subframe_len + sizeof(struct ethhdr);
-	int padding = (4 - amsdu_len) & 3;
-
-	if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) {
+	if (skb_headroom(skb) < headroom) {
 		I802_DEBUG_INC(local->tx_expand_skb_head);
 
-		if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) {
+		if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) {
 			wiphy_debug(local->hw.wiphy,
 				    "failed to reallocate TX buffer\n");
 			return false;
 		}
 	}
 
-	if (padding) {
-		*subframe_len += padding;
-		skb_put_zero(skb, padding);
-	}
-
 	return true;
 }
 
@@ -3117,8 +3108,7 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
 	if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
 		return true;
 
-	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr),
-					 &subframe_len))
+	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr)))
 		return false;
 
 	data = skb_push(skb, sizeof(*amsdu_hdr));
@@ -3184,7 +3174,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	void *data;
 	bool ret = false;
 	unsigned int orig_len;
-	int n = 1, nfrags;
+	int n = 1, nfrags, pad = 0;
+	u16 hdrlen;
 
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
 		return false;
@@ -3235,8 +3226,19 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (max_frags && nfrags > max_frags)
 		goto out;
 
-	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2,
-					 &subframe_len))
+	/*
+	 * Pad out the previous subframe to a multiple of 4 by adding the
+	 * padding to the next one, that's being added. Note that head->len
+	 * is the length of the full A-MSDU, but that works since each time
+	 * we add a new subframe we pad out the previous one to a multiple
+	 * of 4 and thus it no longer matters in the next round.
+	 */
+	hdrlen = fast_tx->hdr_len - sizeof(rfc1042_header);
+	if ((head->len - hdrlen) & 3)
+		pad = 4 - ((head->len - hdrlen) & 3);
+
+	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) +
+						     2 + pad))
 		goto out;
 
 	ret = true;
@@ -3248,6 +3250,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	memcpy(data, &len, 2);
 	memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header));
 
+	memset(skb_push(skb, pad), 0, pad);
+
 	head->len += skb->len;
 	head->data_len += skb->len;
 	*frag_tail = skb;
-- 
cgit v1.2.3-59-g8ed1b


From 0e06b227c5221dd51b5569de93f3b9f532be4a32 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:34 +0200
Subject: bpf: fix msg->data/data_end after sg shift repair in
 bpf_msg_pull_data

In the current code, msg->data is set as sg_virt(&sg[i]) + start - offset
and msg->data_end relative to it as msg->data + bytes. Using iterator i
to point to the updated starting scatterlist element holds true for some
cases, however not for all where we'd end up pointing out of bounds. It
is /correct/ for these ones:

1) When first finding the starting scatterlist element (sge) where we
   find that the page is already privately owned by the msg and where
   the requested bytes and headroom fit into the sge's length.

However, it's /incorrect/ for the following ones:

2) After we made the requested area private and updated the newly allocated
   page into first_sg slot of the scatterlist ring; when we find that no
   shift repair of the ring is needed where we bail out updating msg->data
   and msg->data_end. At that point i will point to last_sg, which in this
   case is the next elem of first_sg in the ring. The sge at that point
   might as well be invalid (e.g. i == msg->sg_end), which we use for
   setting the range of sg_virt(&sg[i]). The correct one would have been
   first_sg.

3) Similar as in 2) but when we find that a shift repair of the ring is
   needed. In this case we fix up all sges and stop once we've reached the
   end. In this case i will point to will point to the new msg->sg_end,
   and the sge at that point will be invalid. Again here the requested
   range sits in first_sg.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index ec4d67c0cf0c..b9225c55926a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2310,6 +2310,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
+	first_sg = i;
 	/* The start may point into the sg element so we need to also
 	 * account for the headroom.
 	 */
@@ -2317,8 +2318,6 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (!msg->sg_copy[i] && bytes_sg_total <= len)
 		goto out;
 
-	first_sg = i;
-
 	/* At this point we need to linearize multiple scatterlist
 	 * elements or a single shared page. Either way we need to
 	 * copy into a linear buffer exclusively owned by BPF. Then
@@ -2400,7 +2399,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (msg->sg_end < 0)
 		msg->sg_end += MAX_SKB_FRAGS;
 out:
-	msg->data = sg_virt(&sg[i]) + start - offset;
+	msg->data = sg_virt(&sg[first_sg]) + start - offset;
 	msg->data_end = msg->data + bytes;
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 2e43f95dd8ee62bc8bf57f2afac37fbd70c8d565 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:35 +0200
Subject: bpf: fix shift upon scatterlist ring wrap-around in bpf_msg_pull_data

If first_sg and last_sg wraps around in the scatterlist ring, then we
need to account for that in the shift as well. E.g. crafting such msgs
where this is the case leads to a hang as shift becomes negative. E.g.
consider the following scenario:

  first_sg := 14     |=>    shift := -12     msg->sg_start := 10
  last_sg  :=  3     |                       msg->sg_end   :=  5

round  1:  i := 15, move_from :=   3, sg[15] := sg[  3]
round  2:  i :=  0, move_from := -12, sg[ 0] := sg[-12]
round  3:  i :=  1, move_from := -11, sg[ 1] := sg[-11]
round  4:  i :=  2, move_from := -10, sg[ 2] := sg[-10]
[...]
round 13:  i := 11, move_from :=  -1, sg[ 2] := sg[ -1]
round 14:  i := 12, move_from :=   0, sg[ 2] := sg[  0]
round 15:  i := 13, move_from :=   1, sg[ 2] := sg[  1]
round 16:  i := 14, move_from :=   2, sg[ 2] := sg[  2]
round 17:  i := 15, move_from :=   3, sg[ 2] := sg[  3]
[...]

This means we will loop forever and never hit the msg->sg_end condition
to break out of the loop. When we see that the ring wraps around, then
the shift should be MAX_SKB_FRAGS - first_sg + last_sg - 1. Meaning,
the remainder slots from the tail of the ring and the head until last_sg
combined.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index b9225c55926a..43ba5f8c38ca 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2370,7 +2370,10 @@ BPF_CALL_4(bpf_msg_pull_data,
 	 * had a single entry though we can just replace it and
 	 * be done. Otherwise walk the ring and shift the entries.
 	 */
-	shift = last_sg - first_sg - 1;
+	WARN_ON_ONCE(last_sg == first_sg);
+	shift = last_sg > first_sg ?
+		last_sg - first_sg - 1 :
+		MAX_SKB_FRAGS - first_sg + last_sg - 1;
 	if (!shift)
 		goto out;
 
-- 
cgit v1.2.3-59-g8ed1b


From a8cf76a9023bc6709b1361d06bb2fae5227b9d68 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:36 +0200
Subject: bpf: fix sg shift repair start offset in bpf_msg_pull_data

When we perform the sg shift repair for the scatterlist ring, we
currently start out at i = first_sg + 1. However, this is not
correct since the first_sg could point to the sge sitting at slot
MAX_SKB_FRAGS - 1, and a subsequent i = MAX_SKB_FRAGS will access
the scatterlist ring (sg) out of bounds. Add the sk_msg_iter_var()
helper for iterating through the ring, and apply the same rule
for advancing to the next ring element as we do elsewhere. Later
work will use this helper also in other places.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 43ba5f8c38ca..2c7801f6737a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2282,6 +2282,13 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+#define sk_msg_iter_var(var)			\
+	do {					\
+		var++;				\
+		if (var == MAX_SKB_FRAGS)	\
+			var = 0;		\
+	} while (0)
+
 BPF_CALL_4(bpf_msg_pull_data,
 	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
 {
@@ -2302,9 +2309,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		if (start < offset + len)
 			break;
 		offset += len;
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (i != msg->sg_end);
 
 	if (unlikely(start >= offset + len))
@@ -2330,9 +2335,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	 */
 	do {
 		copy += sg[i].length;
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 		if (bytes_sg_total <= copy)
 			break;
 	} while (i != msg->sg_end);
@@ -2358,9 +2361,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		sg[i].length = 0;
 		put_page(sg_page(&sg[i]));
 
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (i != last_sg);
 
 	sg[first_sg].length = copy;
@@ -2377,7 +2378,8 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (!shift)
 		goto out;
 
-	i = first_sg + 1;
+	i = first_sg;
+	sk_msg_iter_var(i);
 	do {
 		int move_from;
 
@@ -2394,9 +2396,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		sg[move_from].page_link = 0;
 		sg[move_from].offset = 0;
 
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (1);
 	msg->sg_end -= shift;
 	if (msg->sg_end < 0)
-- 
cgit v1.2.3-59-g8ed1b


From 9f2895461439fda2801a7906fb4c5fb3dbb37a0a Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 23 Aug 2018 19:49:54 +0300
Subject: vti6: remove !skb->ignore_df check from vti6_xmit()

Before the commit d6990976af7c ("vti6: fix PMTU caching and reporting
on xmit") '!skb->ignore_df' check was always true because the function
skb_scrub_packet() was called before it, resetting ignore_df to zero.

In the commit, skb_scrub_packet() was moved below, and now this check
can be false for the packet, e.g. when sending it in the two fragments,
this prevents successful PMTU updates in such case. The next attempts
to send the packet lead to the same tx error. Moreover, vti6 initial
MTU value relies on PMTU adjustments.

This issue can be reproduced with the following LTP test script:
    udp_ipsec_vti.sh -6 -p ah -m tunnel -s 2000

Fixes: ccd740cbc6e0 ("vti6: Add pmtu handling to vti6_xmit.")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_vti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 5095367c7204..eeaf7455d51e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -481,7 +481,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	}
 
 	mtu = dst_mtu(dst);
-	if (!skb->ignore_df && skb->len > mtu) {
+	if (skb->len > mtu) {
 		skb_dst_update_pmtu(skb, mtu);
 
 		if (skb->protocol == htons(ETH_P_IPV6)) {
-- 
cgit v1.2.3-59-g8ed1b


From bd583fe30427500a2d0abe25724025b1cb5e2636 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 23 Aug 2018 16:19:44 -0700
Subject: tipc: fix a missing rhashtable_walk_exit()

rhashtable_walk_exit() must be paired with rhashtable_walk_enter().

Fixes: 40f9f4397060 ("tipc: Fix tipc_sk_reinit race conditions")
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c1e93c9515bc..c9a50b62c738 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2672,6 +2672,8 @@ void tipc_sk_reinit(struct net *net)
 
 		rhashtable_walk_stop(&iter);
 	} while (tsk == ERR_PTR(-EAGAIN));
+
+	rhashtable_walk_exit(&iter);
 }
 
 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
-- 
cgit v1.2.3-59-g8ed1b


From 9a07efa9aea2f4a59f35da0785a4e6a6b5a96192 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 24 Aug 2018 12:28:06 -0700
Subject: tipc: switch to rhashtable iterator

syzbot reported a use-after-free in tipc_group_fill_sock_diag(),
where tipc_group_fill_sock_diag() still reads tsk->group meanwhile
tipc_group_delete() just deletes it in tipc_release().

tipc_nl_sk_walk() aims to lock this sock when walking each sock
in the hash table to close race conditions with sock changes like
this one, by acquiring tsk->sk.sk_lock.slock spinlock, unfortunately
this doesn't work at all. All non-BH call path should take
lock_sock() instead to make it work.

tipc_nl_sk_walk() brutally iterates with raw rht_for_each_entry_rcu()
where RCU read lock is required, this is the reason why lock_sock()
can't be taken on this path. This could be resolved by switching to
rhashtable iterator API's, where taking a sleepable lock is possible.
Also, the iterator API's are friendly for restartable calls like
diag dump, the last position is remembered behind the scence,
all we need to do here is saving the iterator into cb->args[].

I tested this with parallel tipc diag dump and thousands of tipc
socket creation and release, no crash or memory leak.

Reported-by: syzbot+b9c8f3ab2994b7cd1625@syzkaller.appspotmail.com
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/diag.c    |  2 ++
 net/tipc/netlink.c |  2 ++
 net/tipc/socket.c  | 76 +++++++++++++++++++++++++++++++++++-------------------
 net/tipc/socket.h  |  2 ++
 4 files changed, 56 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/tipc/diag.c b/net/tipc/diag.c
index aaabb0b776dd..73137f4aeb68 100644
--- a/net/tipc/diag.c
+++ b/net/tipc/diag.c
@@ -84,7 +84,9 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
 
 	if (h->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
+			.start = tipc_dump_start,
 			.dump = tipc_diag_dump,
+			.done = tipc_dump_done,
 		};
 		netlink_dump_start(net->diag_nlsk, skb, h, &c);
 		return 0;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 6ff2254088f6..99ee419210ba 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -167,7 +167,9 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 	},
 	{
 		.cmd	= TIPC_NL_SOCK_GET,
+		.start = tipc_dump_start,
 		.dumpit	= tipc_nl_sk_dump,
+		.done	= tipc_dump_done,
 		.policy = tipc_nl_policy,
 	},
 	{
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c9a50b62c738..ab7a2a7178f7 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3229,45 +3229,69 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
 				       struct netlink_callback *cb,
 				       struct tipc_sock *tsk))
 {
-	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = tipc_net(net);
-	const struct bucket_table *tbl;
-	u32 prev_portid = cb->args[1];
-	u32 tbl_id = cb->args[0];
-	struct rhash_head *pos;
+	struct rhashtable_iter *iter = (void *)cb->args[0];
 	struct tipc_sock *tsk;
 	int err;
 
-	rcu_read_lock();
-	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
-	for (; tbl_id < tbl->size; tbl_id++) {
-		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
-			spin_lock_bh(&tsk->sk.sk_lock.slock);
-			if (prev_portid && prev_portid != tsk->portid) {
-				spin_unlock_bh(&tsk->sk.sk_lock.slock);
+	rhashtable_walk_start(iter);
+	while ((tsk = rhashtable_walk_next(iter)) != NULL) {
+		if (IS_ERR(tsk)) {
+			err = PTR_ERR(tsk);
+			if (err == -EAGAIN) {
+				err = 0;
 				continue;
 			}
+			break;
+		}
 
-			err = skb_handler(skb, cb, tsk);
-			if (err) {
-				prev_portid = tsk->portid;
-				spin_unlock_bh(&tsk->sk.sk_lock.slock);
-				goto out;
-			}
-
-			prev_portid = 0;
-			spin_unlock_bh(&tsk->sk.sk_lock.slock);
+		sock_hold(&tsk->sk);
+		rhashtable_walk_stop(iter);
+		lock_sock(&tsk->sk);
+		err = skb_handler(skb, cb, tsk);
+		if (err) {
+			release_sock(&tsk->sk);
+			sock_put(&tsk->sk);
+			goto out;
 		}
+		release_sock(&tsk->sk);
+		rhashtable_walk_start(iter);
+		sock_put(&tsk->sk);
 	}
+	rhashtable_walk_stop(iter);
 out:
-	rcu_read_unlock();
-	cb->args[0] = tbl_id;
-	cb->args[1] = prev_portid;
-
 	return skb->len;
 }
 EXPORT_SYMBOL(tipc_nl_sk_walk);
 
+int tipc_dump_start(struct netlink_callback *cb)
+{
+	struct rhashtable_iter *iter = (void *)cb->args[0];
+	struct net *net = sock_net(cb->skb->sk);
+	struct tipc_net *tn = tipc_net(net);
+
+	if (!iter) {
+		iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+		if (!iter)
+			return -ENOMEM;
+
+		cb->args[0] = (long)iter;
+	}
+
+	rhashtable_walk_enter(&tn->sk_rht, iter);
+	return 0;
+}
+EXPORT_SYMBOL(tipc_dump_start);
+
+int tipc_dump_done(struct netlink_callback *cb)
+{
+	struct rhashtable_iter *hti = (void *)cb->args[0];
+
+	rhashtable_walk_exit(hti);
+	kfree(hti);
+	return 0;
+}
+EXPORT_SYMBOL(tipc_dump_done);
+
 int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
 			   struct tipc_sock *tsk, u32 sk_filter_state,
 			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index aff9b2ae5a1f..d43032e26532 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -68,4 +68,6 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
 		    int (*skb_handler)(struct sk_buff *skb,
 				       struct netlink_callback *cb,
 				       struct tipc_sock *tsk));
+int tipc_dump_start(struct netlink_callback *cb);
+int tipc_dump_done(struct netlink_callback *cb);
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 85eb9af182243ce9a8b72410d5321c440ac5f8d7 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 27 Aug 2018 22:56:22 +0200
Subject: net/sched: act_pedit: fix dump of extended layered op

in the (rare) case of failure in nla_nest_start(), missing NULL checks in
tcf_pedit_key_ex_dump() can make the following command

 # tc action add action pedit ex munge ip ttl set 64

dereference a NULL pointer:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
 PGD 800000007d1cd067 P4D 800000007d1cd067 PUD 7acd3067 PMD 0
 Oops: 0002 [#1] SMP PTI
 CPU: 0 PID: 3336 Comm: tc Tainted: G            E     4.18.0.pedit+ #425
 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 RIP: 0010:tcf_pedit_dump+0x19d/0x358 [act_pedit]
 Code: be 02 00 00 00 48 89 df 66 89 44 24 20 e8 9b b1 fd e0 85 c0 75 46 8b 83 c8 00 00 00 49 83 c5 08 48 03 83 d0 00 00 00 4d 39 f5 <66> 89 04 25 00 00 00 00 0f 84 81 01 00 00 41 8b 45 00 48 8d 4c 24
 RSP: 0018:ffffb5d4004478a8 EFLAGS: 00010246
 RAX: ffff8880fcda2070 RBX: ffff8880fadd2900 RCX: 0000000000000000
 RDX: 0000000000000002 RSI: ffffb5d4004478ca RDI: ffff8880fcda206e
 RBP: ffff8880fb9cb900 R08: 0000000000000008 R09: ffff8880fcda206e
 R10: ffff8880fadd2900 R11: 0000000000000000 R12: ffff8880fd26cf40
 R13: ffff8880fc957430 R14: ffff8880fc957430 R15: ffff8880fb9cb988
 FS:  00007f75a537a740(0000) GS:ffff8880fda00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 000000007a2fa005 CR4: 00000000001606f0
 Call Trace:
  ? __nla_reserve+0x38/0x50
  tcf_action_dump_1+0xd2/0x130
  tcf_action_dump+0x6a/0xf0
  tca_get_fill.constprop.31+0xa3/0x120
  tcf_action_add+0xd1/0x170
  tc_ctl_action+0x137/0x150
  rtnetlink_rcv_msg+0x263/0x2d0
  ? _cond_resched+0x15/0x40
  ? rtnl_calcit.isra.30+0x110/0x110
  netlink_rcv_skb+0x4d/0x130
  netlink_unicast+0x1a3/0x250
  netlink_sendmsg+0x2ae/0x3a0
  sock_sendmsg+0x36/0x40
  ___sys_sendmsg+0x26f/0x2d0
  ? do_wp_page+0x8e/0x5f0
  ? handle_pte_fault+0x6c3/0xf50
  ? __handle_mm_fault+0x38e/0x520
  ? __sys_sendmsg+0x5e/0xa0
  __sys_sendmsg+0x5e/0xa0
  do_syscall_64+0x5b/0x180
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f75a4583ba0
 Code: c3 48 8b 05 f2 62 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d fd c3 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ae cc 00 00 48 89 04 24
 RSP: 002b:00007fff60ee7418 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 00007fff60ee7540 RCX: 00007f75a4583ba0
 RDX: 0000000000000000 RSI: 00007fff60ee7490 RDI: 0000000000000003
 RBP: 000000005b842d3e R08: 0000000000000002 R09: 0000000000000000
 R10: 00007fff60ee6ea0 R11: 0000000000000246 R12: 0000000000000000
 R13: 00007fff60ee7554 R14: 0000000000000001 R15: 000000000066c100
 Modules linked in: act_pedit(E) ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul ext4 crc32_pclmul mbcache ghash_clmulni_intel jbd2 pcbc snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer cryptd glue_helper snd joydev pcspkr soundcore virtio_balloon i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk virtio_console failover qxl crc32c_intel drm_kms_helper syscopyarea serio_raw sysfillrect sysimgblt fb_sys_fops ttm drm ata_piix virtio_pci libata virtio_ring i2c_core virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_pedit]
 CR2: 0000000000000000

Like it's done for other TC actions, give up dumping pedit rules and return
an error if nla_nest_start() returns NULL.

Fixes: 71d0ed7079df ("net/act_pedit: Support using offset relative to the conventional network headers")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_pedit.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 107034070019..ad99a99f11f6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -109,16 +109,18 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 {
 	struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX);
 
+	if (!keys_start)
+		goto nla_failure;
 	for (; n > 0; n--) {
 		struct nlattr *key_start;
 
 		key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX);
+		if (!key_start)
+			goto nla_failure;
 
 		if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) ||
-		    nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) {
-			nlmsg_trim(skb, keys_start);
-			return -EINVAL;
-		}
+		    nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd))
+			goto nla_failure;
 
 		nla_nest_end(skb, key_start);
 
@@ -128,6 +130,9 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 	nla_nest_end(skb, keys_start);
 
 	return 0;
+nla_failure:
+	nla_nest_cancel(skb, keys_start);
+	return -EINVAL;
 }
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
@@ -418,7 +423,10 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 	opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
 
 	if (p->tcfp_keys_ex) {
-		tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
+		if (tcf_pedit_key_ex_dump(skb,
+					  p->tcfp_keys_ex,
+					  p->tcfp_nkeys))
+			goto nla_put_failure;
 
 		if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
 			goto nla_put_failure;
-- 
cgit v1.2.3-59-g8ed1b


From afe49de44c27a89e8e9631c44b5ffadf6ace65e2 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 28 Aug 2018 13:40:51 +0200
Subject: ipv6: fix cleanup ordering for ip6_mr failure

Commit 15e668070a64 ("ipv6: reorder icmpv6_init() and ip6_mr_init()")
moved the cleanup label for ipmr_fail, but should have changed the
contents of the cleanup labels as well. Now we can end up cleaning up
icmpv6 even though it hasn't been initialized (jump to icmp_fail or
ipmr_fail).

Simply undo things in the reverse order of their initialization.

Example of panic (triggered by faking a failure of icmpv6_init):

    kasan: GPF could be caused by NULL-ptr deref or user memory access
    general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI
    [...]
    RIP: 0010:__list_del_entry_valid+0x79/0x160
    [...]
    Call Trace:
     ? lock_release+0x8a0/0x8a0
     unregister_pernet_operations+0xd4/0x560
     ? ops_free_list+0x480/0x480
     ? down_write+0x91/0x130
     ? unregister_pernet_subsys+0x15/0x30
     ? down_read+0x1b0/0x1b0
     ? up_read+0x110/0x110
     ? kmem_cache_create_usercopy+0x1b4/0x240
     unregister_pernet_subsys+0x1d/0x30
     icmpv6_cleanup+0x1d/0x30
     inet6_init+0x1b5/0x23f

Fixes: 15e668070a64 ("ipv6: reorder icmpv6_init() and ip6_mr_init()")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 673bba31eb18..e5da133c6932 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1113,11 +1113,11 @@ netfilter_fail:
 igmp_fail:
 	ndisc_cleanup();
 ndisc_fail:
-	ip6_mr_cleanup();
+	icmpv6_cleanup();
 icmp_fail:
-	unregister_pernet_subsys(&inet6_net_ops);
+	ip6_mr_cleanup();
 ipmr_fail:
-	icmpv6_cleanup();
+	unregister_pernet_subsys(&inet6_net_ops);
 register_pernet_fail:
 	sock_unregister(PF_INET6);
 	rtnl_unregister_all(PF_INET6);
-- 
cgit v1.2.3-59-g8ed1b


From a03dc36bdca6b614651fedfcd8559cf914d2d21d Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 28 Aug 2018 13:40:52 +0200
Subject: ipv6: fix cleanup ordering for pingv6 registration

Commit 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.")
contains an error in the cleanup path of inet6_init(): when
proto_register(&pingv6_prot, 1) fails, we try to unregister
&pingv6_prot. When rawv6_init() fails, we skip unregistering
&pingv6_prot.

Example of panic (triggered by faking a failure of
 proto_register(&pingv6_prot, 1)):

    general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI
    [...]
    RIP: 0010:__list_del_entry_valid+0x79/0x160
    [...]
    Call Trace:
     proto_unregister+0xbb/0x550
     ? trace_preempt_on+0x6f0/0x6f0
     ? sock_no_shutdown+0x10/0x10
     inet6_init+0x153/0x1b8

Fixes: 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e5da133c6932..9a4261e50272 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -938,14 +938,14 @@ static int __init inet6_init(void)
 
 	err = proto_register(&pingv6_prot, 1);
 	if (err)
-		goto out_unregister_ping_proto;
+		goto out_unregister_raw_proto;
 
 	/* We MUST register RAW sockets before we create the ICMP6,
 	 * IGMP6, or NDISC control sockets.
 	 */
 	err = rawv6_init();
 	if (err)
-		goto out_unregister_raw_proto;
+		goto out_unregister_ping_proto;
 
 	/* Register the family here so that the init calls below will
 	 * be able to create sockets. (?? is this dangerous ??)
-- 
cgit v1.2.3-59-g8ed1b


From f707ef61e17261f2bb18c3e4871c6f135ab3aba9 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 28 Aug 2018 13:40:53 +0200
Subject: net: rtnl: return early from rtnl_unregister_all when protocol isn't
 registered

rtnl_unregister_all(PF_INET6) gets called from inet6_init in cases when
no handler has been registered for PF_INET6 yet, for example if
ip6_mr_init() fails. Abort and avoid a NULL pointer deref in that case.

Example of panic (triggered by faking a failure of
 register_pernet_subsys):

    general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI
    [...]
    RIP: 0010:rtnl_unregister_all+0x17e/0x2a0
    [...]
    Call Trace:
     ? rtnetlink_net_init+0x250/0x250
     ? sock_unregister+0x103/0x160
     ? kernel_getsockopt+0x200/0x200
     inet6_init+0x197/0x20d

Fixes: e2fddf5e96df ("[IPV6]: Make af_inet6 to check ip6_route_init return value.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 24431e578310..60c928894a78 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -324,6 +324,10 @@ void rtnl_unregister_all(int protocol)
 
 	rtnl_lock();
 	tab = rtnl_msg_handlers[protocol];
+	if (!tab) {
+		rtnl_unlock();
+		return;
+	}
 	RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
 	for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
 		link = tab[msgindex];
-- 
cgit v1.2.3-59-g8ed1b


From 97763dc0f4010bc20e2969a6bf9a40a2551c4f79 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 29 Aug 2018 10:22:33 +0200
Subject: net_sched: reject unknown tcfa_action values

After the commit 802bfb19152c ("net/sched: user-space can't set
unknown tcfa_action values"), unknown tcfa_action values are
converted to TC_ACT_UNSPEC, but the common agreement is instead
rejecting such configurations.

This change also introduces a helper to simplify the destruction
of a single action, avoiding code duplication.

v1 -> v2:
 - helper is now static and renamed according to act_* convention
 - updated extack message, according to the new behavior

Fixes: 802bfb19152c ("net/sched: user-space can't set unknown tcfa_action values")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index db83dac1e7f4..316c98bb87e4 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -662,6 +662,13 @@ int tcf_action_destroy(struct tc_action *actions[], int bind)
 	return ret;
 }
 
+static int tcf_action_destroy_1(struct tc_action *a, int bind)
+{
+	struct tc_action *actions[] = { a, NULL };
+
+	return tcf_action_destroy(actions, bind);
+}
+
 static int tcf_action_put(struct tc_action *p)
 {
 	return __tcf_action_put(p, false);
@@ -881,17 +888,16 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
 		err = tcf_action_goto_chain_init(a, tp);
 		if (err) {
-			struct tc_action *actions[] = { a, NULL };
-
-			tcf_action_destroy(actions, bind);
+			tcf_action_destroy_1(a, bind);
 			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
 			return ERR_PTR(err);
 		}
 	}
 
 	if (!tcf_action_valid(a->tcfa_action)) {
-		NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
-		a->tcfa_action = TC_ACT_UNSPEC;
+		tcf_action_destroy_1(a, bind);
+		NL_SET_ERR_MSG(extack, "Invalid control action value");
+		return ERR_PTR(-EINVAL);
 	}
 
 	return a;
-- 
cgit v1.2.3-59-g8ed1b


From 4f0223bfe9c3e62d8f45a85f1ef1b18a8a263ef9 Mon Sep 17 00:00:00 2001
From: Arunk Khandavalli <akhandav@codeaurora.org>
Date: Thu, 30 Aug 2018 00:40:16 +0300
Subject: cfg80211: nl80211_update_ft_ies() to validate NL80211_ATTR_IE

nl80211_update_ft_ies() tried to validate NL80211_ATTR_IE with
is_valid_ie_attr() before dereferencing it, but that helper function
returns true in case of NULL pointer (i.e., attribute not included).
This can result to dereferencing a NULL pointer. Fix that by explicitly
checking that NL80211_ATTR_IE is included.

Fixes: 355199e02b83 ("cfg80211: Extend support for IEEE 802.11r Fast BSS Transition")
Signed-off-by: Arunk Khandavalli <akhandav@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ce0149a86c13..733ccf867972 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12099,6 +12099,7 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
 		return -EOPNOTSUPP;
 
 	if (!info->attrs[NL80211_ATTR_MDID] ||
+	    !info->attrs[NL80211_ATTR_IE] ||
 	    !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
 
-- 
cgit v1.2.3-59-g8ed1b


From 1eb507903665442360a959136dfa3234c43db085 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Wed, 29 Aug 2018 21:03:25 +0200
Subject: mac80211: do not convert to A-MSDU if frag/subframe limited

Do not start to aggregate packets in a A-MSDU frame (converting the
first subframe to A-MSDU, adding the header) if max_tx_fragments or
max_amsdu_subframes limits are already exceeded by it. In particular,
this happens when drivers set the limit to 1 to avoid A-MSDUs at all.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
[reword commit message to be more precise]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 667a73d6eb5c..1aac5e3c7eee 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3208,9 +3208,6 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (skb->len + head->len > max_amsdu_len)
 		goto out;
 
-	if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
-		goto out;
-
 	nfrags = 1 + skb_shinfo(skb)->nr_frags;
 	nfrags += 1 + skb_shinfo(head)->nr_frags;
 	frag_tail = &skb_shinfo(head)->frag_list;
@@ -3226,6 +3223,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (max_frags && nfrags > max_frags)
 		goto out;
 
+	if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
+		goto out;
+
 	/*
 	 * Pad out the previous subframe to a multiple of 4 by adding the
 	 * padding to the next one, that's being added. Note that head->len
-- 
cgit v1.2.3-59-g8ed1b


From aa58acf325b4aadeecae2bfc90658273b47dbace Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Aug 2018 10:55:49 +0200
Subject: mac80211: always account for A-MSDU header changes

In the error path of changing the SKB headroom of the second
A-MSDU subframe, we would not account for the already-changed
length of the first frame that just got converted to be in
A-MSDU format and thus is a bit longer now.

Fix this by doing the necessary accounting.

It would be possible to reorder the operations, but that would
make the code more complex (to calculate the necessary pad),
and the headroom expansion should not fail frequently enough
to make that worthwhile.

Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1aac5e3c7eee..6ca0865de945 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3239,7 +3239,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 
 	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) +
 						     2 + pad))
-		goto out;
+		goto out_recalc;
 
 	ret = true;
 	data = skb_push(skb, ETH_ALEN + 2);
@@ -3256,11 +3256,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	head->data_len += skb->len;
 	*frag_tail = skb;
 
-	flow->backlog += head->len - orig_len;
-	tin->backlog_bytes += head->len - orig_len;
-
-	fq_recalc_backlog(fq, tin, flow);
+out_recalc:
+	if (head->len != orig_len) {
+		flow->backlog += head->len - orig_len;
+		tin->backlog_bytes += head->len - orig_len;
 
+		fq_recalc_backlog(fq, tin, flow);
+	}
 out:
 	spin_unlock_bh(&fq->lock);
 
-- 
cgit v1.2.3-59-g8ed1b


From 13ba17bee18e321b073b49a88dcab10881f757da Mon Sep 17 00:00:00 2001
From: Mukesh Ojha <mojha@codeaurora.org>
Date: Fri, 24 Aug 2018 18:03:53 +0530
Subject: notifier: Remove notifier header file wherever not used

The conversion of the hotplug notifiers to a state machine left the
notifier.h includes around in some places. Remove them.

Signed-off-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/1535114033-4605-1-git-send-email-mojha@codeaurora.org
---
 fs/buffer.c            | 1 -
 kernel/printk/printk.c | 1 -
 lib/percpu_counter.c   | 1 -
 mm/page-writeback.c    | 1 -
 mm/page_alloc.c        | 1 -
 mm/slub.c              | 1 -
 net/core/dev.c         | 1 -
 7 files changed, 7 deletions(-)

(limited to 'net')

diff --git a/fs/buffer.c b/fs/buffer.c
index 4cc679d5bf58..6f1ae3ac9789 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -39,7 +39,6 @@
 #include <linux/buffer_head.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/bio.h>
-#include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/bitops.h>
 #include <linux/mpage.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 924e37fb1620..fd6f8ed28e01 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -38,7 +38,6 @@
 #include <linux/kmsg_dump.h>
 #include <linux/syslog.h>
 #include <linux/cpu.h>
-#include <linux/notifier.h>
 #include <linux/rculist.h>
 #include <linux/poll.h>
 #include <linux/irq_work.h>
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index c72577e472f2..a66595ba5543 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -4,7 +4,6 @@
  */
 
 #include <linux/percpu_counter.h>
-#include <linux/notifier.h>
 #include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/cpu.h>
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6551d3b0dc30..84ae9bf5858a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -27,7 +27,6 @@
 #include <linux/mpage.h>
 #include <linux/rmap.h>
 #include <linux/percpu.h>
-#include <linux/notifier.h>
 #include <linux/smp.h>
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e75865d58ba7..05e983f42316 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -32,7 +32,6 @@
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
 #include <linux/oom.h>
-#include <linux/notifier.h>
 #include <linux/topology.h>
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
diff --git a/mm/slub.c b/mm/slub.c
index ce2b9e5cea77..8da34a8af53d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -19,7 +19,6 @@
 #include <linux/slab.h>
 #include "slab.h"
 #include <linux/proc_fs.h>
-#include <linux/notifier.h>
 #include <linux/seq_file.h>
 #include <linux/kasan.h>
 #include <linux/cpu.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 325fc5088370..82114e1111e6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -93,7 +93,6 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
-#include <linux/notifier.h>
 #include <linux/skbuff.h>
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
-- 
cgit v1.2.3-59-g8ed1b


From 3a7ad0634f0986d807772ba74f66f7c3a73612e5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 29 Aug 2018 11:50:12 -0700
Subject: Revert "packet: switch kvzalloc to allocate memory"

This reverts commit 71e41286203c017d24f041a7cd71abea7ca7b1e0.

mmap()/munmap() can not be backed by kmalloced pages :

We fault in :

    VM_BUG_ON_PAGE(PageSlab(page), page);

    unmap_single_vma+0x8a/0x110
    unmap_vmas+0x4b/0x90
    unmap_region+0xc9/0x140
    do_munmap+0x274/0x360
    vm_munmap+0x81/0xc0
    SyS_munmap+0x2b/0x40
    do_syscall_64+0x13e/0x1c0
    entry_SYSCALL_64_after_hwframe+0x42/0xb7

Fixes: 71e41286203c ("packet: switch kvzalloc to allocate memory")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: John Sperbeck <jsperbeck@google.com>
Bisected-by: John Sperbeck <jsperbeck@google.com>
Cc: Zhang Yu <zhangyu31@baidu.com>
Cc: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 44 +++++++++++++++++++++++++++++++-------------
 net/packet/internal.h  |  1 +
 2 files changed, 32 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5610061e7f2e..75c92a87e7b2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4137,36 +4137,52 @@ static const struct vm_operations_struct packet_mmap_ops = {
 	.close	=	packet_mm_close,
 };
 
-static void free_pg_vec(struct pgv *pg_vec, unsigned int len)
+static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
+			unsigned int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++) {
 		if (likely(pg_vec[i].buffer)) {
-			kvfree(pg_vec[i].buffer);
+			if (is_vmalloc_addr(pg_vec[i].buffer))
+				vfree(pg_vec[i].buffer);
+			else
+				free_pages((unsigned long)pg_vec[i].buffer,
+					   order);
 			pg_vec[i].buffer = NULL;
 		}
 	}
 	kfree(pg_vec);
 }
 
-static char *alloc_one_pg_vec_page(unsigned long size)
+static char *alloc_one_pg_vec_page(unsigned long order)
 {
 	char *buffer;
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
+			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
 
-	buffer = kvzalloc(size, GFP_KERNEL);
+	buffer = (char *) __get_free_pages(gfp_flags, order);
 	if (buffer)
 		return buffer;
 
-	buffer = kvzalloc(size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+	/* __get_free_pages failed, fall back to vmalloc */
+	buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
+	if (buffer)
+		return buffer;
 
-	return buffer;
+	/* vmalloc failed, lets dig into swap here */
+	gfp_flags &= ~__GFP_NORETRY;
+	buffer = (char *) __get_free_pages(gfp_flags, order);
+	if (buffer)
+		return buffer;
+
+	/* complete and utter failure */
+	return NULL;
 }
 
-static struct pgv *alloc_pg_vec(struct tpacket_req *req)
+static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
 {
 	unsigned int block_nr = req->tp_block_nr;
-	unsigned long size = req->tp_block_size;
 	struct pgv *pg_vec;
 	int i;
 
@@ -4175,7 +4191,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req)
 		goto out;
 
 	for (i = 0; i < block_nr; i++) {
-		pg_vec[i].buffer = alloc_one_pg_vec_page(size);
+		pg_vec[i].buffer = alloc_one_pg_vec_page(order);
 		if (unlikely(!pg_vec[i].buffer))
 			goto out_free_pgvec;
 	}
@@ -4184,7 +4200,7 @@ out:
 	return pg_vec;
 
 out_free_pgvec:
-	free_pg_vec(pg_vec, block_nr);
+	free_pg_vec(pg_vec, order, block_nr);
 	pg_vec = NULL;
 	goto out;
 }
@@ -4194,9 +4210,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 {
 	struct pgv *pg_vec = NULL;
 	struct packet_sock *po = pkt_sk(sk);
+	int was_running, order = 0;
 	struct packet_ring_buffer *rb;
 	struct sk_buff_head *rb_queue;
-	int was_running;
 	__be16 num;
 	int err = -EINVAL;
 	/* Added to avoid minimal code churn */
@@ -4258,7 +4274,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 			goto out;
 
 		err = -ENOMEM;
-		pg_vec = alloc_pg_vec(req);
+		order = get_order(req->tp_block_size);
+		pg_vec = alloc_pg_vec(req, order);
 		if (unlikely(!pg_vec))
 			goto out;
 		switch (po->tp_version) {
@@ -4312,6 +4329,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		rb->frame_size = req->tp_frame_size;
 		spin_unlock_bh(&rb_queue->lock);
 
+		swap(rb->pg_vec_order, order);
 		swap(rb->pg_vec_len, req->tp_block_nr);
 
 		rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
@@ -4337,7 +4355,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 	}
 
 	if (pg_vec)
-		free_pg_vec(pg_vec, req->tp_block_nr);
+		free_pg_vec(pg_vec, order, req->tp_block_nr);
 out:
 	return err;
 }
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 8f50036f62f0..3bb7c5fb3bff 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -64,6 +64,7 @@ struct packet_ring_buffer {
 	unsigned int		frame_size;
 	unsigned int		frame_max;
 
+	unsigned int		pg_vec_order;
 	unsigned int		pg_vec_pages;
 	unsigned int		pg_vec_len;
 
-- 
cgit v1.2.3-59-g8ed1b


From b0e0b0abbd5e52568e3848b0ba54a9efa3a11547 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 30 Aug 2018 13:30:03 +0200
Subject: net/rds: RDS is not Radio Data System

Getting prompt "The RDS Protocol" (RDS) is not too helpful, and it is
easily confused with Radio Data System (which we may want to support
in kernel, too).

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index 01b3bd6a3708..b9092111bc45 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -1,6 +1,6 @@
 
 config RDS
-	tristate "The RDS Protocol"
+	tristate "The Reliable Datagram Sockets Protocol"
 	depends on INET
 	---help---
 	  The RDS (Reliable Datagram Sockets) protocol provides reliable,
-- 
cgit v1.2.3-59-g8ed1b


From 63cc357f7bba6729869565a12df08441a5995d9a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 30 Aug 2018 14:24:29 +0200
Subject: tcp: do not restart timewait timer on rst reception

RFC 1337 says:
 ''Ignore RST segments in TIME-WAIT state.
   If the 2 minute MSL is enforced, this fix avoids all three hazards.''

So with net.ipv4.tcp_rfc1337=1, expected behaviour is to have TIME-WAIT sk
expire rather than removing it instantly when a reset is received.

However, Linux will also re-start the TIME-WAIT timer.

This causes connect to fail when tying to re-use ports or very long
delays (until syn retry interval exceeds MSL).

packetdrill test case:
// Demonstrate bogus rearming of TIME-WAIT timer in rfc1337 mode.
`sysctl net.ipv4.tcp_rfc1337=1`

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < S 0:0(0) win 29200 <mss 1460,nop,nop,sackOK,nop,wscale 7>
0.100 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7>
0.200 < . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4

// Receive first segment
0.310 < P. 1:1001(1000) ack 1 win 46

// Send one ACK
0.310 > . 1:1(0) ack 1001

// read 1000 byte
0.310 read(4, ..., 1000) = 1000

// Application writes 100 bytes
0.350 write(4, ..., 100) = 100
0.350 > P. 1:101(100) ack 1001

// ACK
0.500 < . 1001:1001(0) ack 101 win 257

// close the connection
0.600 close(4) = 0
0.600 > F. 101:101(0) ack 1001 win 244

// Our side is in FIN_WAIT_1 & waits for ack to fin
0.7 < . 1001:1001(0) ack 102 win 244

// Our side is in FIN_WAIT_2 with no outstanding data.
0.8 < F. 1001:1001(0) ack 102 win 244
0.8 > . 102:102(0) ack 1002 win 244

// Our side is now in TIME_WAIT state, send ack for fin.
0.9 < F. 1002:1002(0) ack 102 win 244
0.9 > . 102:102(0) ack 1002 win 244

// Peer reopens with in-window SYN:
1.000 < S 1000:1000(0) win 9200 <mss 1460,nop,nop,sackOK,nop,wscale 7>

// Therefore, reply with ACK.
1.000 > . 102:102(0) ack 1002 win 244

// Peer sends RST for this ACK.  Normally this RST results
// in tw socket removal, but rfc1337=1 setting prevents this.
1.100 < R 1002:1002(0) win 244

// second syn. Due to rfc1337=1 expect another pure ACK.
31.0 < S 1000:1000(0) win 9200 <mss 1460,nop,nop,sackOK,nop,wscale 7>
31.0 > . 102:102(0) ack 1002 win 244

// .. and another RST from peer.
31.1 < R 1002:1002(0) win 244
31.2 `echo no timer restart;ss -m -e -a -i -n -t -o state TIME-WAIT`

// third syn after one minute.  Time-Wait socket should have expired by now.
63.0 < S 1000:1000(0) win 9200 <mss 1460,nop,nop,sackOK,nop,wscale 7>

// so we expect a syn-ack & 3whs to proceed from here on.
63.0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7>

Without this patch, 'ss' shows restarts of tw timer and last packet is
thus just another pure ack, more than one minute later.

This restores the original code from commit 283fd6cf0be690a83
("Merge in ANK networking jumbo patch") in netdev-vger-cvs.git .

For some reason the else branch was removed/lost in 1f28b683339f7
("Merge in TCP/UDP optimizations and [..]") and timer restart became
unconditional.

Reported-by: Michal Tesar <mtesar@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_minisocks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75ef332a7caf..12affb7864d9 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -184,8 +184,9 @@ kill:
 				inet_twsk_deschedule_put(tw);
 				return TCP_TW_SUCCESS;
 			}
+		} else {
+			inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
 		}
-		inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
-- 
cgit v1.2.3-59-g8ed1b


From 93bbadd6e0a2a58e49d265b9b1aa58e621b60a26 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 30 Aug 2018 19:11:24 +0300
Subject: ipv6: don't get lwtstate twice in ip6_rt_copy_init()

Commit 80f1a0f4e0cd ("net/ipv6: Put lwtstate when destroying fib6_info")
partially fixed the kmemleak [1], lwtstate can be copied from fib6_info,
with ip6_rt_copy_init(), and it should be done only once there.

rt->dst.lwtstate is set by ip6_rt_init_dst(), at the start of the function
ip6_rt_copy_init(), so there is no need to get it again at the end.

With this patch, lwtstate also isn't copied from RTF_REJECT routes.

[1]:
unreferenced object 0xffff880b6aaa14e0 (size 64):
  comm "ip", pid 10577, jiffies 4295149341 (age 1273.903s)
  hex dump (first 32 bytes):
    01 00 04 00 04 00 00 00 10 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000018664623>] lwtunnel_build_state+0x1bc/0x420
    [<00000000b73aa29a>] ip6_route_info_create+0x9f7/0x1fd0
    [<00000000ee2c5d1f>] ip6_route_add+0x14/0x70
    [<000000008537b55c>] inet6_rtm_newroute+0xd9/0xe0
    [<000000002acc50f5>] rtnetlink_rcv_msg+0x66f/0x8e0
    [<000000008d9cd381>] netlink_rcv_skb+0x268/0x3b0
    [<000000004c893c76>] netlink_unicast+0x417/0x5a0
    [<00000000f2ab1afb>] netlink_sendmsg+0x70b/0xc30
    [<00000000890ff0aa>] sock_sendmsg+0xb1/0xf0
    [<00000000a2e7b66f>] ___sys_sendmsg+0x659/0x950
    [<000000001e7426c8>] __sys_sendmsg+0xde/0x170
    [<00000000fe411443>] do_syscall_64+0x9f/0x4a0
    [<000000001be7b28b>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
    [<000000006d21f353>] 0xffffffffffffffff

Fixes: 6edb3c96a5f0 ("net/ipv6: Defer initialization of dst to data path")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c4ea13e8360b..18e00ce1719a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -996,7 +996,6 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 	rt->rt6i_src = ort->fib6_src;
 #endif
 	rt->rt6i_prefsrc = ort->fib6_prefsrc;
-	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
 }
 
 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
-- 
cgit v1.2.3-59-g8ed1b


From 9db39f4d4f94b61e4b64b077f6ddb2bdfb533a88 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tushar.n.dave@oracle.com>
Date: Fri, 31 Aug 2018 23:45:16 +0200
Subject: bpf: Fix bpf_msg_pull_data()

Helper bpf_msg_pull_data() mistakenly reuses variable 'offset' while
linearizing multiple scatterlist elements. Variable 'offset' is used
to find first starting scatterlist element
    i.e. msg->data = sg_virt(&sg[first_sg]) + start - offset"

Use different variable name while linearizing multiple scatterlist
elements so that value contained in variable 'offset' won't get
overwritten.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 2c7801f6737a..aecdeba052d3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2292,7 +2292,7 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 BPF_CALL_4(bpf_msg_pull_data,
 	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
 {
-	unsigned int len = 0, offset = 0, copy = 0;
+	unsigned int len = 0, offset = 0, copy = 0, poffset = 0;
 	int bytes = end - start, bytes_sg_total;
 	struct scatterlist *sg = msg->sg_data;
 	int first_sg, last_sg, i, shift;
@@ -2348,16 +2348,15 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (unlikely(!page))
 		return -ENOMEM;
 	p = page_address(page);
-	offset = 0;
 
 	i = first_sg;
 	do {
 		from = sg_virt(&sg[i]);
 		len = sg[i].length;
-		to = p + offset;
+		to = p + poffset;
 
 		memcpy(to, from, len);
-		offset += len;
+		poffset += len;
 		sg[i].length = 0;
 		put_page(sg_page(&sg[i]));
 
-- 
cgit v1.2.3-59-g8ed1b


From 4fb7253e4f9a8f06a986a3b317e2f79d9b43d552 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 29 Aug 2018 18:06:08 +0800
Subject: igmp: fix incorrect unsolicit report count when join group

We should not start timer if im->unsolicit_count equal to 0 after decrease.
Or we will send one more unsolicit report message. i.e. 3 instead of 2 by
default.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index cf75f8944b05..deb1f8274d71 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -820,10 +820,9 @@ static void igmp_timer_expire(struct timer_list *t)
 	spin_lock(&im->lock);
 	im->tm_running = 0;
 
-	if (im->unsolicit_count) {
-		im->unsolicit_count--;
+	if (im->unsolicit_count && --im->unsolicit_count)
 		igmp_start_timer(im, unsolicited_report_interval(in_dev));
-	}
+
 	im->reporter = 1;
 	spin_unlock(&im->lock);
 
-- 
cgit v1.2.3-59-g8ed1b


From ff06525fcb8ae3c302ac1319bf6c07c026dea964 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 29 Aug 2018 18:06:10 +0800
Subject: igmp: fix incorrect unsolicit report count after link down and up

After link down and up, i.e. when call ip_mc_up(), we doesn't init
im->unsolicit_count. So after igmp_timer_expire(), we will not start
timer again and only send one unsolicit report at last.

Fix it by initializing im->unsolicit_count in igmp_group_added(), so
we can respect igmp robustness value.

Fixes: 24803f38a5c0b ("igmp: do not remove igmp souce list info when set link down")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index deb1f8274d71..4da39446da2d 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1307,6 +1307,8 @@ static void igmp_group_added(struct ip_mc_list *im)
 
 	if (in_dev->dead)
 		return;
+
+	im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
 	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
 		spin_lock_bh(&im->lock);
 		igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1390,9 +1392,6 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 			      unsigned int mode)
 {
 	struct ip_mc_list *im;
-#ifdef CONFIG_IP_MULTICAST
-	struct net *net = dev_net(in_dev->dev);
-#endif
 
 	ASSERT_RTNL();
 
@@ -1419,7 +1418,6 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
 	timer_setup(&im->timer, igmp_timer_expire, 0);
-	im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
 #endif
 
 	im->next_rcu = in_dev->mc_list;
-- 
cgit v1.2.3-59-g8ed1b


From 15a81b418e22a9aa4a0504471fdcb0f4ebf69b96 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 30 Aug 2018 14:15:43 -0700
Subject: net/ipv6: Only update MTU metric if it set

Jan reported a regression after an update to 4.18.5. In this case ipv6
default route is setup by systemd-networkd based on data from an RA. The
RA contains an MTU of 1492 which is used when the route is first inserted
but then systemd-networkd pushes down updates to the default route
without the mtu set.

Prior to the change to fib6_info, metrics such as MTU were held in the
dst_entry and rt6i_pmtu in rt6_info contained an update to the mtu if
any. ip6_mtu would look at rt6i_pmtu first and use it if set. If not,
the value from the metrics is used if it is set and finally falling
back to the idev value.

After the fib6_info change metrics are contained in the fib6_info struct
and there is no equivalent to rt6i_pmtu. To maintain consistency with
the old behavior the new code should only reset the MTU in the metrics
if the route update has it set.

Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info")
Reported-by: Jan Janssen <medhefgo@web.de>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index c861a6d4671d..5516f55e214b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -989,7 +989,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 					fib6_clean_expires(iter);
 				else
 					fib6_set_expires(iter, rt->expires);
-				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
+
+				if (rt->fib6_pmtu)
+					fib6_metric_set(iter, RTAX_MTU,
+							rt->fib6_pmtu);
 				return -EEXIST;
 			}
 			/* If we have the same destination and the same metric,
-- 
cgit v1.2.3-59-g8ed1b


From 66eb02d839e8495ae6b612e2d09ff599374b80e2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Fri, 31 Aug 2018 01:04:13 +0200
Subject: mac80211: fix an off-by-one issue in A-MSDU max_subframe computation

Initialize 'n' to 2 in order to take into account also the first
packet in the estimation of max_subframe limit for a given A-MSDU
since frag_tail pointer is NULL when ieee80211_amsdu_aggregate
routine analyzes the second frame.

Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6ca0865de945..9b3b069e418a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3174,7 +3174,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	void *data;
 	bool ret = false;
 	unsigned int orig_len;
-	int n = 1, nfrags, pad = 0;
+	int n = 2, nfrags, pad = 0;
 	u16 hdrlen;
 
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
-- 
cgit v1.2.3-59-g8ed1b


From 8442938c3a2177ba16043b3a935f2c78266ad399 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 31 Aug 2018 11:10:55 +0300
Subject: cfg80211: fix a type issue in ieee80211_chandef_to_operating_class()

The "chandef->center_freq1" variable is a u32 but "freq" is a u16 so we
are truncating away the high bits.  I noticed this bug because in commit
9cf0a0b4b64a ("cfg80211: Add support for 60GHz band channels 5 and 6")
we made "freq <= 56160 + 2160 * 6" a valid requency when before it was
only "freq <= 56160 + 2160 * 4" that was valid.  It introduces a static
checker warning:

    net/wireless/util.c:1571 ieee80211_chandef_to_operating_class()
    warn: always true condition '(freq <= 56160 + 2160 * 6) => (0-u16max <= 69120)'

But really we probably shouldn't have been truncating the high bits
away to begin with.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3c654cd7ba56..908bf5b6d89e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1374,7 +1374,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
 					  u8 *op_class)
 {
 	u8 vht_opclass;
-	u16 freq = chandef->center_freq1;
+	u32 freq = chandef->center_freq1;
 
 	if (freq >= 2412 && freq <= 2472) {
 		if (chandef->width > NL80211_CHAN_WIDTH_40)
-- 
cgit v1.2.3-59-g8ed1b


From abd76d255d69d70206c01b9cb19ba36a9c1df6a1 Mon Sep 17 00:00:00 2001
From: "Dreyfuss, Haim" <haim.dreyfuss@intel.com>
Date: Fri, 31 Aug 2018 11:31:04 +0300
Subject: mac80211: fix WMM TXOP calculation

In commit 9236c4523e5b ("mac80211: limit wmm params to comply
with ETSI requirements"), we have limited the WMM parameters to
comply with 802.11 and ETSI standard.  Mistakenly the TXOP value
was caluclated wrong.  Fix it by taking the minimum between
802.11 to ETSI to make sure we are not violating both.

Fixes: e552af058148 ("mac80211: limit wmm params to comply with ETSI requirements")
Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c80187d6e6bb..93b5bb849ad7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1151,8 +1151,7 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 	qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min);
 	qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max);
 	qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn);
-	qparam->txop = !qparam->txop ? wmm_ac->cot / 32 :
-		min_t(u16, qparam->txop, wmm_ac->cot / 32);
+	qparam->txop = min_t(u16, qparam->txop, wmm_ac->cot / 32);
 	rcu_read_unlock();
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f3ffb6c3a28963657eb8b02a795d75f2ebbd5ef4 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 31 Aug 2018 11:31:06 +0300
Subject: mac80211: fix a race between restart and CSA flows

We hit a problem with iwlwifi that was caused by a bug in
mac80211. A bug in iwlwifi caused the firwmare to crash in
certain cases in channel switch. Because of that bug,
drv_pre_channel_switch would fail and trigger the restart
flow.
Now we had the hw restart worker which runs on the system's
workqueue and the csa_connection_drop_work worker that runs
on mac80211's workqueue that can run together. This is
obviously problematic since the restart work wants to
reconfigure the connection, while the csa_connection_drop_work
worker does the exact opposite: it tries to disconnect.

Fix this by cancelling the csa_connection_drop_work worker
in the restart worker.

Note that this can sound racy: we could have:

driver   iface_work   CSA_work   restart_work
+++++++++++++++++++++++++++++++++++++++++++++
              |
 <--drv_cs ---|
<FW CRASH!>
-CS FAILED-->
              |                       |
              |                 cancel_work(CSA)
           schedule                   |
           CSA work                   |
                         |            |
                        Race between those 2

But this is not possible because we flush the workqueue
in the restart worker before we cancel the CSA worker.
That would be bullet proof if we could guarantee that
we schedule the CSA worker only from the iface_work
which runs on the workqueue (and not on the system's
workqueue), but unfortunately we do have an instance
in which we schedule the CSA work outside the context
of the workqueue (ieee80211_chswitch_done).

Note also that we should probably cancel other workers
like beacon_connection_loss_work and possibly others
for different types of interfaces, at the very least,
IBSS should suffer from the exact same problem, but for
now, do the minimum to fix the actual bug that was actually
experienced and reproduced.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 27cd64acaf00..66cbddd65b47 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -255,8 +255,27 @@ static void ieee80211_restart_work(struct work_struct *work)
 
 	flush_work(&local->radar_detected_work);
 	rtnl_lock();
-	list_for_each_entry(sdata, &local->interfaces, list)
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		/*
+		 * XXX: there may be more work for other vif types and even
+		 * for station mode: a good thing would be to run most of
+		 * the iface type's dependent _stop (ieee80211_mg_stop,
+		 * ieee80211_ibss_stop) etc...
+		 * For now, fix only the specific bug that was seen: race
+		 * between csa_connection_drop_work and us.
+		 */
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+			/*
+			 * This worker is scheduled from the iface worker that
+			 * runs on mac80211's workqueue, so we can't be
+			 * scheduling this worker after the cancel right here.
+			 * The exception is ieee80211_chswitch_done.
+			 * Then we can have a race...
+			 */
+			cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work);
+		}
 		flush_delayed_work(&sdata->dec_tailroom_needed_wk);
+	}
 	ieee80211_scan_cancel(local);
 
 	/* make sure any new ROC will consider local->in_reconfig */
-- 
cgit v1.2.3-59-g8ed1b


From 0007e94355fdb71a1cf5dba0754155cba08f0666 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 31 Aug 2018 11:31:10 +0300
Subject: mac80211: Fix station bandwidth setting after channel switch

When performing a channel switch flow for a managed interface, the
flow did not update the bandwidth of the AP station and the rate
scale algorithm. In case of a channel width downgrade, this would
result with the rate scale algorithm using a bandwidth that does not
match the interface channel configuration.

Fix this by updating the AP station bandwidth and rate scaling algorithm
before the actual channel change in case of a bandwidth downgrade, or
after the actual channel change in case of a bandwidth upgrade.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a59187c016e0..22b699460176 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -978,6 +978,10 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 	 */
 
 	if (sdata->reserved_chanctx) {
+		struct ieee80211_supported_band *sband = NULL;
+		struct sta_info *mgd_sta = NULL;
+		enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20;
+
 		/*
 		 * with multi-vif csa driver may call ieee80211_csa_finish()
 		 * many times while waiting for other interfaces to use their
@@ -986,6 +990,48 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 		if (sdata->reserved_ready)
 			goto out;
 
+		if (sdata->vif.bss_conf.chandef.width !=
+		    sdata->csa_chandef.width) {
+			/*
+			 * For managed interface, we need to also update the AP
+			 * station bandwidth and align the rate scale algorithm
+			 * on the bandwidth change. Here we only consider the
+			 * bandwidth of the new channel definition (as channel
+			 * switch flow does not have the full HT/VHT/HE
+			 * information), assuming that if additional changes are
+			 * required they would be done as part of the processing
+			 * of the next beacon from the AP.
+			 */
+			switch (sdata->csa_chandef.width) {
+			case NL80211_CHAN_WIDTH_20_NOHT:
+			case NL80211_CHAN_WIDTH_20:
+			default:
+				bw = IEEE80211_STA_RX_BW_20;
+				break;
+			case NL80211_CHAN_WIDTH_40:
+				bw = IEEE80211_STA_RX_BW_40;
+				break;
+			case NL80211_CHAN_WIDTH_80:
+				bw = IEEE80211_STA_RX_BW_80;
+				break;
+			case NL80211_CHAN_WIDTH_80P80:
+			case NL80211_CHAN_WIDTH_160:
+				bw = IEEE80211_STA_RX_BW_160;
+				break;
+			}
+
+			mgd_sta = sta_info_get(sdata, ifmgd->bssid);
+			sband =
+				local->hw.wiphy->bands[sdata->csa_chandef.chan->band];
+		}
+
+		if (sdata->vif.bss_conf.chandef.width >
+		    sdata->csa_chandef.width) {
+			mgd_sta->sta.bandwidth = bw;
+			rate_control_rate_update(local, sband, mgd_sta,
+						 IEEE80211_RC_BW_CHANGED);
+		}
+
 		ret = ieee80211_vif_use_reserved_context(sdata);
 		if (ret) {
 			sdata_info(sdata,
@@ -996,6 +1042,13 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 			goto out;
 		}
 
+		if (sdata->vif.bss_conf.chandef.width <
+		    sdata->csa_chandef.width) {
+			mgd_sta->sta.bandwidth = bw;
+			rate_control_rate_update(local, sband, mgd_sta,
+						 IEEE80211_RC_BW_CHANGED);
+		}
+
 		goto out;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 6c18b27d6e5c6a7206364eae2b47bc8d8b2fa68f Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 31 Aug 2018 11:31:12 +0300
Subject: mac80211: don't Tx a deauth frame if the AP forbade Tx

If the driver fails to properly prepare for the channel
switch, mac80211 will disconnect. If the CSA IE had mode
set to 1, it means that the clients are not allowed to send
any Tx on the current channel, and that includes the
deauthentication frame.

Make sure that we don't send the deauthentication frame in
this case.

In iwlwifi, this caused a failure to flush queues since the
firmware already closed the queues after having parsed the
CSA IE. Then mac80211 would wait until the deauthentication
frame would go out (drv_flush(drop=false)) and that would
never happen.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 22b699460176..b046bf95eb3c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1270,6 +1270,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 					 cbss->beacon_interval));
 	return;
  drop_connection:
+	/*
+	 * This is just so that the disconnect flow will know that
+	 * we were trying to switch channel and failed. In case the
+	 * mode is 1 (we are not allowed to Tx), we will know not to
+	 * send a deauthentication frame. Those two fields will be
+	 * reset when the disconnection worker runs.
+	 */
+	sdata->vif.csa_active = true;
+	sdata->csa_block_tx = csa_ie.mode;
+
 	ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
 	mutex_unlock(&local->chanctx_mtx);
 	mutex_unlock(&local->mtx);
@@ -2453,6 +2463,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+	bool tx;
 
 	sdata_lock(sdata);
 	if (!ifmgd->associated) {
@@ -2460,6 +2471,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 		return;
 	}
 
+	tx = !sdata->csa_block_tx;
+
 	/* AP is probably out of range (or not reachable for another reason) so
 	 * remove the bss struct for that AP.
 	 */
@@ -2467,7 +2480,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 
 	ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
 			       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-			       true, frame_buf);
+			       tx, frame_buf);
 	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
 	ifmgd->csa_waiting_bcn = false;
@@ -2478,7 +2491,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 	}
 	mutex_unlock(&local->mtx);
 
-	ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true,
+	ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx,
 				    WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY);
 
 	sdata_unlock(sdata);
-- 
cgit v1.2.3-59-g8ed1b


From c6e57b3896fc76299913b8cfd82d853bee8a2c84 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 31 Aug 2018 11:31:13 +0300
Subject: mac80211: shorten the IBSS debug messages

When tracing is enabled, all the debug messages are recorded and must
not exceed MAX_MSG_LEN (100) columns. Longer debug messages grant the
user with:

WARNING: CPU: 3 PID: 32642 at /tmp/wifi-core-20180806094828/src/iwlwifi-stack-dev/net/mac80211/./trace_msg.h:32 trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211]
Workqueue: phy1 ieee80211_iface_work [mac80211]
 RIP: 0010:trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211]
 Call Trace:
  __sdata_dbg+0xbd/0x120 [mac80211]
  ieee80211_ibss_rx_queued_mgmt+0x15f/0x510 [mac80211]
  ieee80211_iface_work+0x21d/0x320 [mac80211]

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ibss.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 6449a1c2283b..f0f5fedb8caa 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -947,8 +947,8 @@ static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata,
 	if (len < IEEE80211_DEAUTH_FRAME_LEN)
 		return;
 
-	ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n",
-		 mgmt->sa, mgmt->da, mgmt->bssid, reason);
+	ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+	ibss_dbg(sdata, "\tBSSID=%pM (reason: %d)\n", mgmt->bssid, reason);
 	sta_info_destroy_addr(sdata, mgmt->sa);
 }
 
@@ -966,9 +966,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
 	auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
 	auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
 
-	ibss_dbg(sdata,
-		 "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n",
-		 mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction);
+	ibss_dbg(sdata, "RX Auth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+	ibss_dbg(sdata, "\tBSSID=%pM (auth_transaction=%d)\n",
+		 mgmt->bssid, auth_transaction);
 
 	if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1)
 		return;
@@ -1175,10 +1175,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		rx_timestamp = drv_get_tsf(local, sdata);
 	}
 
-	ibss_dbg(sdata,
-		 "RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n",
+	ibss_dbg(sdata, "RX beacon SA=%pM BSSID=%pM TSF=0x%llx\n",
 		 mgmt->sa, mgmt->bssid,
-		 (unsigned long long)rx_timestamp,
+		 (unsigned long long)rx_timestamp);
+	ibss_dbg(sdata, "\tBCN=0x%llx diff=%lld @%lu\n",
 		 (unsigned long long)beacon_timestamp,
 		 (unsigned long long)(rx_timestamp - beacon_timestamp),
 		 jiffies);
@@ -1537,9 +1537,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 
 	tx_last_beacon = drv_tx_last_beacon(local);
 
-	ibss_dbg(sdata,
-		 "RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n",
-		 mgmt->sa, mgmt->da, mgmt->bssid, tx_last_beacon);
+	ibss_dbg(sdata, "RX ProbeReq SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+	ibss_dbg(sdata, "\tBSSID=%pM (tx_last_beacon=%d)\n",
+		 mgmt->bssid, tx_last_beacon);
 
 	if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da))
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From 36feaac35405e932ad9c321d7a2db8a7e4a4d7ca Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 31 Aug 2018 16:52:01 +0800
Subject: ip6_tunnel: respect ttl inherit for ip6tnl

man ip-tunnel ttl section says:
0 is a special value meaning that packets inherit the TTL value.

IPv4 tunnel respect this in ip_tunnel_xmit(), but IPv6 tunnel has not
implement it yet. To make IPv6 behave consistently with IP tunnel,
add ipv6 tunnel inherit support.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5df2a58d945c..419960b0ba16 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1188,7 +1188,15 @@ route_lookup:
 		init_tel_txopt(&opt, encap_limit);
 		ipv6_push_frag_opts(skb, &opt.ops, &proto);
 	}
-	hop_limit = hop_limit ? : ip6_dst_hoplimit(dst);
+
+	if (hop_limit == 0) {
+		if (skb->protocol == htons(ETH_P_IP))
+			hop_limit = ip_hdr(skb)->ttl;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			hop_limit = ipv6_hdr(skb)->hop_limit;
+		else
+			hop_limit = ip6_dst_hoplimit(dst);
+	}
 
 	/* Calculate max headroom for all the headers and adjust
 	 * needed_headroom if necessary.
-- 
cgit v1.2.3-59-g8ed1b


From c10bbfae3ae43fae1d77e16f05a73474acf514ff Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 3 Sep 2018 10:04:55 +0300
Subject: net: sched: null actions array pointer before releasing action

Currently, tcf_action_delete() nulls actions array pointer after putting
and deleting it. However, if tcf_idr_delete_index() returns an error,
pointer to action is not set to null. That results it being released second
time in error handling code of tca_action_gd().

Kasan error:

[  807.367755] ==================================================================
[  807.375844] BUG: KASAN: use-after-free in tc_setup_cb_call+0x14e/0x250
[  807.382763] Read of size 8 at addr ffff88033e636000 by task tc/2732

[  807.391289] CPU: 0 PID: 2732 Comm: tc Tainted: G        W         4.19.0-rc1+ #799
[  807.399542] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  807.407948] Call Trace:
[  807.410763]  dump_stack+0x92/0xeb
[  807.414456]  print_address_description+0x70/0x360
[  807.419549]  kasan_report+0x14d/0x300
[  807.423582]  ? tc_setup_cb_call+0x14e/0x250
[  807.428150]  tc_setup_cb_call+0x14e/0x250
[  807.432539]  ? nla_put+0x65/0xe0
[  807.436146]  fl_dump+0x394/0x3f0 [cls_flower]
[  807.440890]  ? fl_tmplt_dump+0x140/0x140 [cls_flower]
[  807.446327]  ? lock_downgrade+0x320/0x320
[  807.450702]  ? lock_acquire+0xe2/0x220
[  807.454819]  ? is_bpf_text_address+0x5/0x140
[  807.459475]  ? memcpy+0x34/0x50
[  807.462980]  ? nla_put+0x65/0xe0
[  807.466582]  tcf_fill_node+0x341/0x430
[  807.470717]  ? tcf_block_put+0xe0/0xe0
[  807.474859]  tcf_node_dump+0xdb/0xf0
[  807.478821]  fl_walk+0x8e/0x170 [cls_flower]
[  807.483474]  tcf_chain_dump+0x35a/0x4d0
[  807.487703]  ? tfilter_notify+0x170/0x170
[  807.492091]  ? tcf_fill_node+0x430/0x430
[  807.496411]  tc_dump_tfilter+0x362/0x3f0
[  807.500712]  ? tc_del_tfilter+0x850/0x850
[  807.505104]  ? kasan_unpoison_shadow+0x30/0x40
[  807.509940]  ? __mutex_unlock_slowpath+0xcf/0x410
[  807.515031]  netlink_dump+0x263/0x4f0
[  807.519077]  __netlink_dump_start+0x2a0/0x300
[  807.523817]  ? tc_del_tfilter+0x850/0x850
[  807.528198]  rtnetlink_rcv_msg+0x46a/0x6d0
[  807.532671]  ? rtnl_fdb_del+0x3f0/0x3f0
[  807.536878]  ? tc_del_tfilter+0x850/0x850
[  807.541280]  netlink_rcv_skb+0x18d/0x200
[  807.545570]  ? rtnl_fdb_del+0x3f0/0x3f0
[  807.549773]  ? netlink_ack+0x500/0x500
[  807.553913]  netlink_unicast+0x2d0/0x370
[  807.558212]  ? netlink_attachskb+0x340/0x340
[  807.562855]  ? _copy_from_iter_full+0xe9/0x3e0
[  807.567677]  ? import_iovec+0x11e/0x1c0
[  807.571890]  netlink_sendmsg+0x3b9/0x6a0
[  807.576192]  ? netlink_unicast+0x370/0x370
[  807.580684]  ? netlink_unicast+0x370/0x370
[  807.585154]  sock_sendmsg+0x6b/0x80
[  807.589015]  ___sys_sendmsg+0x4a1/0x520
[  807.593230]  ? copy_msghdr_from_user+0x210/0x210
[  807.598232]  ? do_wp_page+0x174/0x880
[  807.602276]  ? __handle_mm_fault+0x749/0x1c10
[  807.607021]  ? __handle_mm_fault+0x1046/0x1c10
[  807.611849]  ? __pmd_alloc+0x320/0x320
[  807.615973]  ? check_chain_key+0x140/0x1f0
[  807.620450]  ? check_chain_key+0x140/0x1f0
[  807.624929]  ? __fget_light+0xbc/0xd0
[  807.628970]  ? __sys_sendmsg+0xd7/0x150
[  807.633172]  __sys_sendmsg+0xd7/0x150
[  807.637201]  ? __ia32_sys_shutdown+0x30/0x30
[  807.641846]  ? up_read+0x53/0x90
[  807.645442]  ? __do_page_fault+0x484/0x780
[  807.649949]  ? do_syscall_64+0x1e/0x2c0
[  807.654164]  do_syscall_64+0x72/0x2c0
[  807.658198]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  807.663625] RIP: 0033:0x7f42e9870150
[  807.667568] Code: 8b 15 3c 7d 2b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 83 3d b9 d5 2b 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24
[  807.687328] RSP: 002b:00007ffdbf595b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  807.695564] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f42e9870150
[  807.703083] RDX: 0000000000000000 RSI: 00007ffdbf595b80 RDI: 0000000000000003
[  807.710605] RBP: 00007ffdbf599d90 R08: 0000000000679bc0 R09: 000000000000000f
[  807.718127] R10: 00000000000005e7 R11: 0000000000000246 R12: 00007ffdbf599d88
[  807.725651] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000

[  807.735048] Allocated by task 2687:
[  807.738902]  kasan_kmalloc+0xa0/0xd0
[  807.742852]  __kmalloc+0x118/0x2d0
[  807.746615]  tcf_idr_create+0x44/0x320
[  807.750738]  tcf_nat_init+0x41e/0x530 [act_nat]
[  807.755638]  tcf_action_init_1+0x4e0/0x650
[  807.760104]  tcf_action_init+0x1ce/0x2d0
[  807.764395]  tcf_exts_validate+0x1d8/0x200
[  807.768861]  fl_change+0x55a/0x26b4 [cls_flower]
[  807.773845]  tc_new_tfilter+0x748/0xa20
[  807.778051]  rtnetlink_rcv_msg+0x56a/0x6d0
[  807.782517]  netlink_rcv_skb+0x18d/0x200
[  807.786804]  netlink_unicast+0x2d0/0x370
[  807.791095]  netlink_sendmsg+0x3b9/0x6a0
[  807.795387]  sock_sendmsg+0x6b/0x80
[  807.799240]  ___sys_sendmsg+0x4a1/0x520
[  807.803445]  __sys_sendmsg+0xd7/0x150
[  807.807473]  do_syscall_64+0x72/0x2c0
[  807.811506]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

[  807.818776] Freed by task 2728:
[  807.822283]  __kasan_slab_free+0x122/0x180
[  807.826752]  kfree+0xf4/0x2f0
[  807.830080]  __tcf_action_put+0x5a/0xb0
[  807.834281]  tcf_action_put_many+0x46/0x70
[  807.838747]  tca_action_gd+0x232/0xc40
[  807.842862]  tc_ctl_action+0x215/0x230
[  807.846977]  rtnetlink_rcv_msg+0x56a/0x6d0
[  807.851444]  netlink_rcv_skb+0x18d/0x200
[  807.855731]  netlink_unicast+0x2d0/0x370
[  807.860021]  netlink_sendmsg+0x3b9/0x6a0
[  807.864312]  sock_sendmsg+0x6b/0x80
[  807.868166]  ___sys_sendmsg+0x4a1/0x520
[  807.872372]  __sys_sendmsg+0xd7/0x150
[  807.876401]  do_syscall_64+0x72/0x2c0
[  807.880431]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

[  807.887704] The buggy address belongs to the object at ffff88033e636000
                which belongs to the cache kmalloc-256 of size 256
[  807.900909] The buggy address is located 0 bytes inside of
                256-byte region [ffff88033e636000, ffff88033e636100)
[  807.913155] The buggy address belongs to the page:
[  807.918322] page:ffffea000cf98d80 count:1 mapcount:0 mapping:ffff88036f80ee00 index:0x0 compound_mapcount: 0
[  807.928831] flags: 0x5fff8000008100(slab|head)
[  807.933647] raw: 005fff8000008100 ffffea000db44f00 0000000400000004 ffff88036f80ee00
[  807.942050] raw: 0000000000000000 0000000080190019 00000001ffffffff 0000000000000000
[  807.950456] page dumped because: kasan: bad access detected

[  807.958240] Memory state around the buggy address:
[  807.963405]  ffff88033e635f00: fc fc fc fc fb fb fb fb fb fb fb fc fc fc fc fb
[  807.971288]  ffff88033e635f80: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc
[  807.979166] >ffff88033e636000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  807.994882]                    ^
[  807.998477]  ffff88033e636080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  808.006352]  ffff88033e636100: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
[  808.014230] ==================================================================
[  808.022108] Disabling lock debugging due to kernel taint

Fixes: edfaf94fa705 ("net_sched: improve and refactor tcf_action_put_many()")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 316c98bb87e4..e12f8ef7baa4 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1179,6 +1179,7 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
 		struct tcf_idrinfo *idrinfo = a->idrinfo;
 		u32 act_index = a->tcfa_index;
 
+		actions[i] = NULL;
 		if (tcf_action_put(a)) {
 			/* last reference, action was deleted concurrently */
 			module_put(ops->owner);
@@ -1190,7 +1191,6 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
 			if (ret < 0)
 				return ret;
 		}
-		actions[i] = NULL;
 	}
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From af8a2b8ba7678b4695f9e854ba9abae1076beabe Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 3 Sep 2018 15:47:10 +0800
Subject: sctp: fix invalid reference to the index variable of the iterator

Now in sctp_apply_peer_addr_params(), if SPP_IPV6_FLOWLABEL flag is set
and trans is NULL, it would use trans as the index variable to traverse
transport_addr_list, then trans is set as the last transport of it.

Later, if SPP_DSCP flag is set, it would enter into the wrong branch as
trans is actually an invalid reference.

So fix it by using a new index variable to traverse transport_addr_list
for both SPP_DSCP and SPP_IPV6_FLOWLABEL flags process.

Fixes: 0b0dce7a36fb ("sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams")
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aa76586a1a1c..a0ccfa4b8220 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2663,14 +2663,15 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 					   SCTP_FLOWLABEL_VAL_MASK;
 			trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
 		} else if (asoc) {
-			list_for_each_entry(trans,
-					    &asoc->peer.transport_addr_list,
+			struct sctp_transport *t;
+
+			list_for_each_entry(t, &asoc->peer.transport_addr_list,
 					    transports) {
-				if (trans->ipaddr.sa.sa_family != AF_INET6)
+				if (t->ipaddr.sa.sa_family != AF_INET6)
 					continue;
-				trans->flowlabel = params->spp_ipv6_flowlabel &
-						   SCTP_FLOWLABEL_VAL_MASK;
-				trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+				t->flowlabel = params->spp_ipv6_flowlabel &
+					       SCTP_FLOWLABEL_VAL_MASK;
+				t->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
 			}
 			asoc->flowlabel = params->spp_ipv6_flowlabel &
 					  SCTP_FLOWLABEL_VAL_MASK;
@@ -2687,12 +2688,13 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 			trans->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
 			trans->dscp |= SCTP_DSCP_SET_MASK;
 		} else if (asoc) {
-			list_for_each_entry(trans,
-					    &asoc->peer.transport_addr_list,
+			struct sctp_transport *t;
+
+			list_for_each_entry(t, &asoc->peer.transport_addr_list,
 					    transports) {
-				trans->dscp = params->spp_dscp &
-					      SCTP_DSCP_VAL_MASK;
-				trans->dscp |= SCTP_DSCP_SET_MASK;
+				t->dscp = params->spp_dscp &
+					  SCTP_DSCP_VAL_MASK;
+				t->dscp |= SCTP_DSCP_SET_MASK;
 			}
 			asoc->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
 			asoc->dscp |= SCTP_DSCP_SET_MASK;
-- 
cgit v1.2.3-59-g8ed1b


From 741880e1f2f59b20125dc480765d2546cec66080 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 3 Sep 2018 15:47:11 +0800
Subject: sctp: not traverse asoc trans list if non-ipv6 trans exists for
 ipv6_flowlabel

When users set params.spp_address and get a trans, ipv6_flowlabel flag
should be applied into this trans. But even if this one is not an ipv6
trans, it should not go to apply it into all other transes of the asoc
but simply ignore it.

Fixes: 0b0dce7a36fb ("sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a0ccfa4b8220..f73e9d38d5ba 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2658,10 +2658,12 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 	}
 
 	if (params->spp_flags & SPP_IPV6_FLOWLABEL) {
-		if (trans && trans->ipaddr.sa.sa_family == AF_INET6) {
-			trans->flowlabel = params->spp_ipv6_flowlabel &
-					   SCTP_FLOWLABEL_VAL_MASK;
-			trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		if (trans) {
+			if (trans->ipaddr.sa.sa_family == AF_INET6) {
+				trans->flowlabel = params->spp_ipv6_flowlabel &
+						   SCTP_FLOWLABEL_VAL_MASK;
+				trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+			}
 		} else if (asoc) {
 			struct sctp_transport *t;
 
-- 
cgit v1.2.3-59-g8ed1b


From 9cc1bf3928b31e515ed15477b3c7eb653d0b3b42 Mon Sep 17 00:00:00 2001
From: Zhenbo Gao <zhenbo.gao@windriver.com>
Date: Mon, 3 Sep 2018 16:36:45 +0800
Subject: tipc: correct spelling errors for struct tipc_bc_base's comment

Trivial fix for two spelling mistakes.

Signed-off-by: Zhenbo Gao <zhenbo.gao@windriver.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 9ee6cfea56dd..d8026543bf4c 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -51,12 +51,12 @@ const char tipc_bclink_name[] = "broadcast-link";
  * struct tipc_bc_base - base structure for keeping broadcast send state
  * @link: broadcast send link structure
  * @inputq: data input queue; will only carry SOCK_WAKEUP messages
- * @dest: array keeping number of reachable destinations per bearer
+ * @dests: array keeping number of reachable destinations per bearer
  * @primary_bearer: a bearer having links to all broadcast destinations, if any
  * @bcast_support: indicates if primary bearer, if any, supports broadcast
  * @rcast_support: indicates if all peer nodes support replicast
  * @rc_ratio: dest count as percentage of cluster size where send method changes
- * @bc_threshold: calculated drom rc_ratio; if dests > threshold use broadcast
+ * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast
  */
 struct tipc_bc_base {
 	struct tipc_link *link;
-- 
cgit v1.2.3-59-g8ed1b


From a484ef3442d2f05fa59edf4f6d14a8169d1b94a6 Mon Sep 17 00:00:00 2001
From: Zhenbo Gao <zhenbo.gao@windriver.com>
Date: Mon, 3 Sep 2018 16:36:46 +0800
Subject: tipc: correct spelling errors for tipc_topsrv_queue_evt() comments

tipc_conn_queue_evt -> tipc_topsrv_queue_evt
tipc_send_work -> tipc_conn_send_work
tipc_send_to_sock -> tipc_conn_send_to_sock

Signed-off-by: Zhenbo Gao <zhenbo.gao@windriver.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/topsrv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index c8e34ef22c30..2627b5d812e9 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -313,8 +313,8 @@ static void tipc_conn_send_work(struct work_struct *work)
 	conn_put(con);
 }
 
-/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
- * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
+/* tipc_topsrv_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_conn_send_work()->tipc_conn_send_to_sock()
  */
 void tipc_topsrv_queue_evt(struct net *net, int conid,
 			   u32 event, struct tipc_event *evt)
-- 
cgit v1.2.3-59-g8ed1b


From 6d784f1625ea68783cc1fb17de8f6cd3e1660c3f Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 3 Sep 2018 11:08:15 -0700
Subject: act_ife: fix a potential use-after-free

Immediately after module_put(), user could delete this
module, so e->ops could be already freed before we call
e->ops->release().

Fix this by moving module_put() after ops->release().

Fixes: ef6980b6becb ("introduce IFE action")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ife.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 196430aefe87..fc412769a1be 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -400,7 +400,6 @@ static void _tcf_ife_cleanup(struct tc_action *a)
 	struct tcf_meta_info *e, *n;
 
 	list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
-		module_put(e->ops->owner);
 		list_del(&e->metalist);
 		if (e->metaval) {
 			if (e->ops->release)
@@ -408,6 +407,7 @@ static void _tcf_ife_cleanup(struct tc_action *a)
 			else
 				kfree(e->metaval);
 		}
+		module_put(e->ops->owner);
 		kfree(e);
 	}
 }
-- 
cgit v1.2.3-59-g8ed1b


From 84cb8eb26cb9ce3c79928094962a475a9d850a53 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 4 Sep 2018 00:44:42 +0300
Subject: net: sched: action_ife: take reference to meta module

Recent refactoring of add_metainfo() caused use_all_metadata() to add
metainfo to ife action metalist without taking reference to module. This
causes warning in module_put called from ife action cleanup function.

Implement add_metainfo_and_get_ops() function that returns with reference
to module taken if metainfo was added successfully, and call it from
use_all_metadata(), instead of calling __add_metainfo() directly.

Example warning:

[  646.344393] WARNING: CPU: 1 PID: 2278 at kernel/module.c:1139 module_put+0x1cb/0x230
[  646.352437] Modules linked in: act_meta_skbtcindex act_meta_mark act_meta_skbprio act_ife ife veth nfsv3 nfs fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c tun ebtable_filter ebtables ip6table_filter ip6_tables bridge stp llc mlx5_ib ib_uverbs ib_core intel_rapl sb_edac x86_pkg_temp_thermal mlx5_core coretemp kvm_intel kvm nfsd igb irqbypass crct10dif_pclmul devlink crc32_pclmul mei_me joydev ses crc32c_intel enclosure auth_rpcgss i2c_algo_bit ioatdma ptp mei pps_core ghash_clmulni_intel iTCO_wdt iTCO_vendor_support pcspkr dca ipmi_ssif lpc_ich target_core_mod i2c_i801 ipmi_si ipmi_devintf pcc_cpufreq wmi ipmi_msghandler nfs_acl lockd acpi_pad acpi_power_meter grace sunrpc mpt3sas raid_class scsi_transport_sas
[  646.425631] CPU: 1 PID: 2278 Comm: tc Not tainted 4.19.0-rc1+ #799
[  646.432187] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  646.440595] RIP: 0010:module_put+0x1cb/0x230
[  646.445238] Code: f3 66 94 02 e8 26 ff fa ff 85 c0 74 11 0f b6 1d 51 30 94 02 80 fb 01 77 60 83 e3 01 74 13 65 ff 0d 3a 83 db 73 e9 2b ff ff ff <0f> 0b e9 00 ff ff ff e8 59 01 fb ff 85 c0 75 e4 48 c7 c2 20 62 6b
[  646.464997] RSP: 0018:ffff880354d37068 EFLAGS: 00010286
[  646.470599] RAX: 0000000000000000 RBX: ffffffffc0a52518 RCX: ffffffff8c2668db
[  646.478118] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffffffc0a52518
[  646.485641] RBP: ffffffffc0a52180 R08: fffffbfff814a4a4 R09: fffffbfff814a4a3
[  646.493164] R10: ffffffffc0a5251b R11: fffffbfff814a4a4 R12: 1ffff1006a9a6e0d
[  646.500687] R13: 00000000ffffffff R14: ffff880362bab890 R15: dead000000000100
[  646.508213] FS:  00007f4164c99800(0000) GS:ffff88036fe40000(0000) knlGS:0000000000000000
[  646.516961] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  646.523080] CR2: 00007f41638b8420 CR3: 0000000351df0004 CR4: 00000000001606e0
[  646.530595] Call Trace:
[  646.533408]  ? find_symbol_in_section+0x260/0x260
[  646.538509]  tcf_ife_cleanup+0x11b/0x200 [act_ife]
[  646.543695]  tcf_action_cleanup+0x29/0xa0
[  646.548078]  __tcf_action_put+0x5a/0xb0
[  646.552289]  ? nla_put+0x65/0xe0
[  646.555889]  __tcf_idr_release+0x48/0x60
[  646.560187]  tcf_generic_walker+0x448/0x6b0
[  646.564764]  ? tcf_action_dump_1+0x450/0x450
[  646.569411]  ? __lock_is_held+0x84/0x110
[  646.573720]  ? tcf_ife_walker+0x10c/0x20f [act_ife]
[  646.578982]  tca_action_gd+0x972/0xc40
[  646.583129]  ? tca_get_fill.constprop.17+0x250/0x250
[  646.588471]  ? mark_lock+0xcf/0x980
[  646.592324]  ? check_chain_key+0x140/0x1f0
[  646.596832]  ? debug_show_all_locks+0x240/0x240
[  646.601839]  ? memset+0x1f/0x40
[  646.605350]  ? nla_parse+0xca/0x1a0
[  646.609217]  tc_ctl_action+0x215/0x230
[  646.613339]  ? tcf_action_add+0x220/0x220
[  646.617748]  rtnetlink_rcv_msg+0x56a/0x6d0
[  646.622227]  ? rtnl_fdb_del+0x3f0/0x3f0
[  646.626466]  netlink_rcv_skb+0x18d/0x200
[  646.630752]  ? rtnl_fdb_del+0x3f0/0x3f0
[  646.634959]  ? netlink_ack+0x500/0x500
[  646.639106]  netlink_unicast+0x2d0/0x370
[  646.643409]  ? netlink_attachskb+0x340/0x340
[  646.648050]  ? _copy_from_iter_full+0xe9/0x3e0
[  646.652870]  ? import_iovec+0x11e/0x1c0
[  646.657083]  netlink_sendmsg+0x3b9/0x6a0
[  646.661388]  ? netlink_unicast+0x370/0x370
[  646.665877]  ? netlink_unicast+0x370/0x370
[  646.670351]  sock_sendmsg+0x6b/0x80
[  646.674212]  ___sys_sendmsg+0x4a1/0x520
[  646.678443]  ? copy_msghdr_from_user+0x210/0x210
[  646.683463]  ? lock_downgrade+0x320/0x320
[  646.687849]  ? debug_show_all_locks+0x240/0x240
[  646.692760]  ? do_raw_spin_unlock+0xa2/0x130
[  646.697418]  ? _raw_spin_unlock+0x24/0x30
[  646.701798]  ? __handle_mm_fault+0x1819/0x1c10
[  646.706619]  ? __pmd_alloc+0x320/0x320
[  646.710738]  ? debug_show_all_locks+0x240/0x240
[  646.715649]  ? restore_nameidata+0x7b/0xa0
[  646.720117]  ? check_chain_key+0x140/0x1f0
[  646.724590]  ? check_chain_key+0x140/0x1f0
[  646.729070]  ? __fget_light+0xbc/0xd0
[  646.733121]  ? __sys_sendmsg+0xd7/0x150
[  646.737329]  __sys_sendmsg+0xd7/0x150
[  646.741359]  ? __ia32_sys_shutdown+0x30/0x30
[  646.746003]  ? up_read+0x53/0x90
[  646.749601]  ? __do_page_fault+0x484/0x780
[  646.754105]  ? do_syscall_64+0x1e/0x2c0
[  646.758320]  do_syscall_64+0x72/0x2c0
[  646.762353]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  646.767776] RIP: 0033:0x7f4163872150
[  646.771713] Code: 8b 15 3c 7d 2b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 83 3d b9 d5 2b 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24
[  646.791474] RSP: 002b:00007ffdef7d6b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  646.799721] RAX: ffffffffffffffda RBX: 0000000000000024 RCX: 00007f4163872150
[  646.807240] RDX: 0000000000000000 RSI: 00007ffdef7d6bd0 RDI: 0000000000000003
[  646.814760] RBP: 000000005b8b9482 R08: 0000000000000001 R09: 0000000000000000
[  646.822286] R10: 00000000000005e7 R11: 0000000000000246 R12: 00007ffdef7dad20
[  646.829807] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000679bc0
[  646.837360] irq event stamp: 6083
[  646.841043] hardirqs last  enabled at (6081): [<ffffffff8c220a7d>] __call_rcu+0x17d/0x500
[  646.849882] hardirqs last disabled at (6083): [<ffffffff8c004f06>] trace_hardirqs_off_thunk+0x1a/0x1c
[  646.859775] softirqs last  enabled at (5968): [<ffffffff8d4004a1>] __do_softirq+0x4a1/0x6ee
[  646.868784] softirqs last disabled at (6082): [<ffffffffc0a78759>] tcf_ife_cleanup+0x39/0x200 [act_ife]
[  646.878845] ---[ end trace b1b8c12ffe51e657 ]---

Fixes: 5ffe57da29b3 ("act_ife: fix a potential deadlock")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ife.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index fc412769a1be..06a3d4801878 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -326,6 +326,20 @@ static int __add_metainfo(const struct tcf_meta_ops *ops,
 	return ret;
 }
 
+static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops,
+				    struct tcf_ife_info *ife, u32 metaid,
+				    bool exists)
+{
+	int ret;
+
+	if (!try_module_get(ops->owner))
+		return -ENOENT;
+	ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists);
+	if (ret)
+		module_put(ops->owner);
+	return ret;
+}
+
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 			int len, bool exists)
 {
@@ -349,7 +363,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
 
 	read_lock(&ife_mod_lock);
 	list_for_each_entry(o, &ifeoplist, list) {
-		rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists);
+		rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists);
 		if (rc == 0)
 			installed += 1;
 	}
-- 
cgit v1.2.3-59-g8ed1b