From e8d67fa5696e2fcaf956dae36d11e6eff5246101 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Thu, 30 May 2019 00:51:26 +0300
Subject: net: dsa: sja1105: Don't store frame type in skb->cb

Due to a confusion I thought that eth_type_trans() was called by the
network stack whereas it can actually be called by network drivers to
figure out the skb protocol and next packet_type handlers.

In light of the above, it is not safe to store the frame type from the
DSA tagger's .filter callback (first entry point on RX path), since GRO
is yet to be invoked on the received traffic.  Hence it is very likely
that the skb->cb will actually get overwritten between eth_type_trans()
and the actual DSA packet_type handler.

Of course, what this patch fixes is the actual overwriting of the
SJA1105_SKB_CB(skb)->type field from the GRO layer, which made all
frames be seen as SJA1105_FRAME_TYPE_NORMAL (0).

Fixes: 227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through standalone ports")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/sja1105.h | 12 ------------
 net/dsa/tag_sja1105.c       | 10 +++-------
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 603a02e5a8cb..e46e18c47d41 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -20,18 +20,6 @@
 #define SJA1105_LINKLOCAL_FILTER_B		0x011B19000000ull
 #define SJA1105_LINKLOCAL_FILTER_B_MASK		0xFFFFFF000000ull
 
-enum sja1105_frame_type {
-	SJA1105_FRAME_TYPE_NORMAL = 0,
-	SJA1105_FRAME_TYPE_LINK_LOCAL,
-};
-
-struct sja1105_skb_cb {
-	enum sja1105_frame_type type;
-};
-
-#define SJA1105_SKB_CB(skb) \
-	((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb))
-
 struct sja1105_port {
 	struct dsa_port *dp;
 	int mgmt_slot;
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 969402c7dbf1..d43737e6c3fb 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -28,14 +28,10 @@ static inline bool sja1105_is_link_local(const struct sk_buff *skb)
  */
 static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
 {
-	if (sja1105_is_link_local(skb)) {
-		SJA1105_SKB_CB(skb)->type = SJA1105_FRAME_TYPE_LINK_LOCAL;
+	if (sja1105_is_link_local(skb))
 		return true;
-	}
-	if (!dsa_port_is_vlan_filtering(dev->dsa_ptr)) {
-		SJA1105_SKB_CB(skb)->type = SJA1105_FRAME_TYPE_NORMAL;
+	if (!dsa_port_is_vlan_filtering(dev->dsa_ptr))
 		return true;
-	}
 	return false;
 }
 
@@ -84,7 +80,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 
 	skb->offload_fwd_mark = 1;
 
-	if (SJA1105_SKB_CB(skb)->type == SJA1105_FRAME_TYPE_LINK_LOCAL) {
+	if (sja1105_is_link_local(skb)) {
 		/* Management traffic path. Switch embeds the switch ID and
 		 * port ID into bytes of the destination MAC, courtesy of
 		 * the incl_srcpt options.
-- 
cgit v1.2.3-59-g8ed1b


From 62394708f3e01c9f2be6be74eb6305bae1ed924f Mon Sep 17 00:00:00 2001
From: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
Date: Fri, 31 May 2019 10:35:14 +0300
Subject: net: dsa: mv88e6xxx: avoid error message on remove from VLAN 0

When non-bridged, non-vlan'ed mv88e6xxx port is moving down, error
message is logged:

failed to kill vid 0081/0 for device eth_cu_1000_4

This is caused by call from __vlan_vid_del() with vin set to zero, over
call chain this results into _mv88e6xxx_port_vlan_del() called with
vid=0, and mv88e6xxx_vtu_get() called from there returns -EINVAL.

On symmetric path moving port up, call goes through
mv88e6xxx_port_vlan_prepare() that calls mv88e6xxx_port_check_hw_vlan()
that returns -EOPNOTSUPP for zero vid.

This patch changes mv88e6xxx_vtu_get() to also return -EOPNOTSUPP for
zero vid, then this error code is explicitly cleared in
dsa_slave_vlan_rx_kill_vid() and error message is no longer logged.

Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 12f165a71a6c..d8edc4b59a22 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1392,7 +1392,7 @@ static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid,
 	int err;
 
 	if (!vid)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	entry->vid = vid - 1;
 	entry->valid = false;
-- 
cgit v1.2.3-59-g8ed1b


From 2e1f164861e500f4e068a9d909bbd3fcc7841483 Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Fri, 31 May 2019 16:59:50 +0800
Subject: net: hns: Fix loopback test failed at copper ports

When doing a loopback test at copper ports, the serdes loopback
and the phy loopback will fail, because of the adjust link had
not finished, and phy not ready.

Adds sleep between adjust link and test process to fix it.

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index ce15d2350db9..188c3f6791b5 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -339,6 +339,7 @@ static int __lb_setup(struct net_device *ndev,
 static int __lb_up(struct net_device *ndev,
 		   enum hnae_loop loop_mode)
 {
+#define NIC_LB_TEST_WAIT_PHY_LINK_TIME 300
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct hnae_handle *h = priv->ae_handle;
 	int speed, duplex;
@@ -365,6 +366,9 @@ static int __lb_up(struct net_device *ndev,
 
 	h->dev->ops->adjust_link(h, speed, duplex);
 
+	/* wait adjust link done and phy ready */
+	msleep(NIC_LB_TEST_WAIT_PHY_LINK_TIME);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 8c268598192df9a07ca967f6a8d5f12c3c1b3888 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Fri, 31 May 2019 08:31:02 +0100
Subject: Update my email address

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 429c6c624861..2c71e0611fd2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17293,7 +17293,7 @@ F:	Documentation/ABI/stable/sysfs-hypervisor-xen
 F:	Documentation/ABI/testing/sysfs-hypervisor-xen
 
 XEN NETWORK BACKEND DRIVER
-M:	Wei Liu <wei.liu2@citrix.com>
+M:	Wei Liu <wei.liu@kernel.org>
 M:	Paul Durrant <paul.durrant@citrix.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 L:	netdev@vger.kernel.org
-- 
cgit v1.2.3-59-g8ed1b


From afa0925c6fcc6a8f610e996ca09bc3215048033c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 31 May 2019 12:37:23 -0400
Subject: packet: unconditionally free po->rollover

Rollover used to use a complex RCU mechanism for assignment, which had
a race condition. The below patch fixed the bug and greatly simplified
the logic.

The feature depends on fanout, but the state is private to the socket.
Fanout_release returns f only when the last member leaves and the
fanout struct is to be freed.

Destroy rollover unconditionally, regardless of fanout state.

Fixes: 57f015f5eccf2 ("packet: fix crash in fanout_demux_rollover()")
Reported-by: syzbot <syzkaller@googlegroups.com>
Diagnosed-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index fbc775fbf712..d4889bf7248e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3014,8 +3014,8 @@ static int packet_release(struct socket *sock)
 
 	synchronize_net();
 
+	kfree(po->rollover);
 	if (f) {
-		kfree(po->rollover);
 		fanout_release_data(f);
 		kfree(f);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 09faf5a7d7c0bcb07faba072f611937af9dd5788 Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Fri, 31 May 2019 16:47:25 +0300
Subject: net: ethernet: ti: cpsw_ethtool: fix ethtool ring param set

Fix ability to set RX descriptor number, the reason - initially
"tx_max_pending" was set incorrectly, but the issue appears after
adding sanity check, so fix is for "sanity" patch.

Fixes: 37e2d99b59c476 ("ethtool: Ensure new ring parameters are within bounds during SRINGPARAM")
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
index a4a7ec0d2531..6d1c9ebae7cc 100644
--- a/drivers/net/ethernet/ti/cpsw_ethtool.c
+++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
@@ -643,7 +643,7 @@ void cpsw_get_ringparam(struct net_device *ndev,
 	struct cpsw_common *cpsw = priv->cpsw;
 
 	/* not supported */
-	ering->tx_max_pending = 0;
+	ering->tx_max_pending = cpsw->descs_pool_size - CPSW_MAX_QUEUES;
 	ering->tx_pending = cpdma_get_num_tx_descs(cpsw->dma);
 	ering->rx_max_pending = cpsw->descs_pool_size - CPSW_MAX_QUEUES;
 	ering->rx_pending = cpdma_get_num_rx_descs(cpsw->dma);
-- 
cgit v1.2.3-59-g8ed1b


From 9e4f56f1a7f3287718d0083b5cb85298dc05a5fd Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Sat, 1 Jun 2019 08:16:26 +0800
Subject: net: ethernet: mediatek: Use hw_feature to judge if HWLRO is
 supported

Should hw_feature as hardware capability flags to check if hardware LRO
got support.

Signed-off-by: Mark Lee <mark-mc.lee@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index f9fbb3ffa3a6..0b88febbaf2a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2298,13 +2298,13 @@ static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
-		if (dev->features & NETIF_F_LRO) {
+		if (dev->hw_features & NETIF_F_LRO) {
 			cmd->data = MTK_MAX_RX_RING_NUM;
 			ret = 0;
 		}
 		break;
 	case ETHTOOL_GRXCLSRLCNT:
-		if (dev->features & NETIF_F_LRO) {
+		if (dev->hw_features & NETIF_F_LRO) {
 			struct mtk_mac *mac = netdev_priv(dev);
 
 			cmd->rule_cnt = mac->hwlro_ip_cnt;
@@ -2312,11 +2312,11 @@ static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		}
 		break;
 	case ETHTOOL_GRXCLSRULE:
-		if (dev->features & NETIF_F_LRO)
+		if (dev->hw_features & NETIF_F_LRO)
 			ret = mtk_hwlro_get_fdir_entry(dev, cmd);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
-		if (dev->features & NETIF_F_LRO)
+		if (dev->hw_features & NETIF_F_LRO)
 			ret = mtk_hwlro_get_fdir_all(dev, cmd,
 						     rule_locs);
 		break;
@@ -2333,11 +2333,11 @@ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXCLSRLINS:
-		if (dev->features & NETIF_F_LRO)
+		if (dev->hw_features & NETIF_F_LRO)
 			ret = mtk_hwlro_add_ipaddr(dev, cmd);
 		break;
 	case ETHTOOL_SRXCLSRLDEL:
-		if (dev->features & NETIF_F_LRO)
+		if (dev->hw_features & NETIF_F_LRO)
 			ret = mtk_hwlro_del_ipaddr(dev, cmd);
 		break;
 	default:
-- 
cgit v1.2.3-59-g8ed1b


From 880c2d4b2fdfd580ebcd6bb7240a8027a1d34751 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Sat, 1 Jun 2019 08:16:27 +0800
Subject: net: ethernet: mediatek: Use NET_IP_ALIGN to judge if HW
 RX_2BYTE_OFFSET is enabled

Should only enable HW RX_2BYTE_OFFSET function in the case NET_IP_ALIGN
equals to 2.

Signed-off-by: Mark Lee <mark-mc.lee@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 0b88febbaf2a..765cd56ebcd2 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1778,6 +1778,7 @@ static void mtk_poll_controller(struct net_device *dev)
 
 static int mtk_start_dma(struct mtk_eth *eth)
 {
+	u32 rx_2b_offset = (NET_IP_ALIGN == 2) ? MTK_RX_2B_OFFSET : 0;
 	int err;
 
 	err = mtk_dma_init(eth);
@@ -1794,7 +1795,7 @@ static int mtk_start_dma(struct mtk_eth *eth)
 		MTK_QDMA_GLO_CFG);
 
 	mtk_w32(eth,
-		MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+		MTK_RX_DMA_EN | rx_2b_offset |
 		MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
 		MTK_PDMA_GLO_CFG);
 
-- 
cgit v1.2.3-59-g8ed1b


From 67c0aaa1eaec60e9dab301012bdebe6726ae04bd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 2 Jun 2019 19:09:55 +0800
Subject: selftests: set sysctl bc_forwarding properly in router_broadcast.sh

sysctl setting bc_forwarding for $rp2 is needed when ping_test_from h2,
otherwise the bc packets from $rp2 won't be forwarded. This patch is to
add this setting for $rp2.

Also, as ping_test_from does grep "$from" only, which could match some
unexpected output, some test case doesn't really work, like:

  # ping_test_from $h2 198.51.200.255 198.51.200.2
    PING 198.51.200.255 from 198.51.100.2 veth3: 56(84) bytes of data.
    64 bytes from 198.51.100.1: icmp_seq=1 ttl=64 time=0.336 ms

When doing grep $form (198.51.200.2), the output could still match.
So change to grep "bytes from $from" instead.

Fixes: 40f98b9af943 ("selftests: add a selftest for directed broadcast forwarding")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/router_broadcast.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/router_broadcast.sh b/tools/testing/selftests/net/forwarding/router_broadcast.sh
index 9a678ece32b4..4eac0a06f451 100755
--- a/tools/testing/selftests/net/forwarding/router_broadcast.sh
+++ b/tools/testing/selftests/net/forwarding/router_broadcast.sh
@@ -145,16 +145,19 @@ bc_forwarding_disable()
 {
 	sysctl_set net.ipv4.conf.all.bc_forwarding 0
 	sysctl_set net.ipv4.conf.$rp1.bc_forwarding 0
+	sysctl_set net.ipv4.conf.$rp2.bc_forwarding 0
 }
 
 bc_forwarding_enable()
 {
 	sysctl_set net.ipv4.conf.all.bc_forwarding 1
 	sysctl_set net.ipv4.conf.$rp1.bc_forwarding 1
+	sysctl_set net.ipv4.conf.$rp2.bc_forwarding 1
 }
 
 bc_forwarding_restore()
 {
+	sysctl_restore net.ipv4.conf.$rp2.bc_forwarding
 	sysctl_restore net.ipv4.conf.$rp1.bc_forwarding
 	sysctl_restore net.ipv4.conf.all.bc_forwarding
 }
@@ -171,7 +174,7 @@ ping_test_from()
 	log_info "ping $dip, expected reply from $from"
 	ip vrf exec $(master_name_get $oif) \
 		$PING -I $oif $dip -c 10 -i 0.1 -w $PING_TIMEOUT -b 2>&1 \
-		| grep $from &> /dev/null
+		| grep "bytes from $from" > /dev/null
 	check_err_fail $fail $?
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 28e74a7cfd6403f0d1c0f8b10b45d6fae37b227e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Sun, 2 Jun 2019 15:13:00 +0100
Subject: net: sfp: read eeprom in maximum 16 byte increments

Some SFP modules do not like reads longer than 16 bytes, so read the
EEPROM in chunks of 16 bytes at a time.  This behaviour is not specified
in the SFP MSAs, which specifies:

 "The serial interface uses the 2-wire serial CMOS E2PROM protocol
  defined for the ATMEL AT24C01A/02/04 family of components."

and

 "As long as the SFP+ receives an acknowledge, it shall serially clock
  out sequential data words. The sequence is terminated when the host
  responds with a NACK and a STOP instead of an acknowledge."

We must avoid breaking a read across a 16-bit quantity in the diagnostic
page, thankfully all 16-bit quantities in that page are naturally
aligned.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index d4635c2178d1..71812be0ac64 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -281,6 +281,7 @@ static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
 {
 	struct i2c_msg msgs[2];
 	u8 bus_addr = a2 ? 0x51 : 0x50;
+	size_t this_len;
 	int ret;
 
 	msgs[0].addr = bus_addr;
@@ -292,11 +293,26 @@ static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
 	msgs[1].len = len;
 	msgs[1].buf = buf;
 
-	ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
-	if (ret < 0)
-		return ret;
+	while (len) {
+		this_len = len;
+		if (this_len > 16)
+			this_len = 16;
 
-	return ret == ARRAY_SIZE(msgs) ? len : 0;
+		msgs[1].len = this_len;
+
+		ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs));
+		if (ret < 0)
+			return ret;
+
+		if (ret != ARRAY_SIZE(msgs))
+			break;
+
+		msgs[1].buf += this_len;
+		dev_addr += this_len;
+		len -= this_len;
+	}
+
+	return msgs[1].buf - (u8 *)buf;
 }
 
 static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf,
-- 
cgit v1.2.3-59-g8ed1b


From 77316763321ee4050f0576ffd472183aa90dcb30 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Sun, 2 Jun 2019 15:12:54 +0100
Subject: net: phylink: avoid reducing support mask

Avoid reducing the support mask as a result of the interface type
selected for SFP modules, or when setting the link settings through
ethtool - this should only change when the supported link modes of
the hardware combination change.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 9044b95d2afe..4c0616ba314d 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1073,6 +1073,7 @@ EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_get);
 int phylink_ethtool_ksettings_set(struct phylink *pl,
 				  const struct ethtool_link_ksettings *kset)
 {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(support);
 	struct ethtool_link_ksettings our_kset;
 	struct phylink_link_state config;
 	int ret;
@@ -1083,11 +1084,12 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 	    kset->base.autoneg != AUTONEG_ENABLE)
 		return -EINVAL;
 
+	linkmode_copy(support, pl->supported);
 	config = pl->link_config;
 
 	/* Mask out unsupported advertisements */
 	linkmode_and(config.advertising, kset->link_modes.advertising,
-		     pl->supported);
+		     support);
 
 	/* FIXME: should we reject autoneg if phy/mac does not support it? */
 	if (kset->base.autoneg == AUTONEG_DISABLE) {
@@ -1097,7 +1099,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 		 * duplex.
 		 */
 		s = phy_lookup_setting(kset->base.speed, kset->base.duplex,
-				       pl->supported, false);
+				       support, false);
 		if (!s)
 			return -EINVAL;
 
@@ -1126,7 +1128,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
 		__set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
 	}
 
-	if (phylink_validate(pl, pl->supported, &config))
+	if (phylink_validate(pl, support, &config))
 		return -EINVAL;
 
 	/* If autonegotiation is enabled, we must have an advertisement */
@@ -1576,6 +1578,7 @@ static int phylink_sfp_module_insert(void *upstream,
 {
 	struct phylink *pl = upstream;
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(support) = { 0, };
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(support1);
 	struct phylink_link_state config;
 	phy_interface_t iface;
 	int ret = 0;
@@ -1603,6 +1606,8 @@ static int phylink_sfp_module_insert(void *upstream,
 		return ret;
 	}
 
+	linkmode_copy(support1, support);
+
 	iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
 	if (iface == PHY_INTERFACE_MODE_NA) {
 		netdev_err(pl->netdev,
@@ -1612,7 +1617,7 @@ static int phylink_sfp_module_insert(void *upstream,
 	}
 
 	config.interface = iface;
-	ret = phylink_validate(pl, support, &config);
+	ret = phylink_validate(pl, support1, &config);
 	if (ret) {
 		netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
 			   phylink_an_mode_str(MLO_AN_INBAND),
-- 
cgit v1.2.3-59-g8ed1b


From f4cfcfbdf03cf7cf5f9097803415dfdcf965676c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Mon, 3 Jun 2019 02:31:37 +0300
Subject: net: dsa: sja1105: Fix link speed not working at 100 Mbps and below

The hardware values for link speed are held in the sja1105_speed_t enum.
However they do not increase in the order that sja1105_get_speed_cfg was
iterating over them (basically from SJA1105_SPEED_AUTO - 0 - to
SJA1105_SPEED_1000MBPS - 1 - skipping the other two).

Another bug is that the code in sja1105_adjust_port_config relies on the
fact that an invalid link speed is detected by sja1105_get_speed_cfg and
returned as -EINVAL.  However storing this into an enum that only has
positive members will cast it into an unsigned value, and it will miss
the negative check.

So take the simplest approach and remove the sja1105_get_speed_cfg
function and replace it with a simple switch-case statement.

Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Suggested-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 0663b78a2f6c..1c3959efebc4 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -652,16 +652,6 @@ static int sja1105_speed[] = {
 	[SJA1105_SPEED_1000MBPS] = 1000,
 };
 
-static sja1105_speed_t sja1105_get_speed_cfg(unsigned int speed_mbps)
-{
-	int i;
-
-	for (i = SJA1105_SPEED_AUTO; i <= SJA1105_SPEED_1000MBPS; i++)
-		if (sja1105_speed[i] == speed_mbps)
-			return i;
-	return -EINVAL;
-}
-
 /* Set link speed and enable/disable traffic I/O in the MAC configuration
  * for a specific port.
  *
@@ -684,8 +674,21 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	mii = priv->static_config.tables[BLK_IDX_XMII_PARAMS].entries;
 	mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
 
-	speed = sja1105_get_speed_cfg(speed_mbps);
-	if (speed_mbps && speed < 0) {
+	switch (speed_mbps) {
+	case 0:
+		/* No speed update requested */
+		speed = SJA1105_SPEED_AUTO;
+		break;
+	case 10:
+		speed = SJA1105_SPEED_10MBPS;
+		break;
+	case 100:
+		speed = SJA1105_SPEED_100MBPS;
+		break;
+	case 1000:
+		speed = SJA1105_SPEED_1000MBPS;
+		break;
+	default:
 		dev_err(dev, "Invalid speed %iMbps\n", speed_mbps);
 		return -EINVAL;
 	}
@@ -695,10 +698,7 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
 	 * and we no longer need to store it in the static config (already told
 	 * hardware we want auto during upload phase).
 	 */
-	if (speed_mbps)
-		mac[port].speed = speed;
-	else
-		mac[port].speed = SJA1105_SPEED_AUTO;
+	mac[port].speed = speed;
 
 	/* On P/Q/R/S, one can read from the device via the MAC reconfiguration
 	 * tables. On E/T, MAC reconfig tables are not readable, only writable.
-- 
cgit v1.2.3-59-g8ed1b


From 27393f8c6efc03b8e0b64134721b0d337fca0a80 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 4 Jun 2019 12:00:11 -0700
Subject: Revert "net/tls: avoid NULL-deref on resync during device removal"

This reverts commit 38030d7cb77963ba84cdbe034806e2b81245339f.
Unfortunately the RX resync may get called from soft IRQ,
so we can't take the rwsem to protect from the device
disappearing.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index b95c408fd771..49b3a2ff8ef3 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -553,8 +553,8 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
 void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct net_device *netdev = tls_ctx->netdev;
 	struct tls_offload_context_rx *rx_ctx;
-	struct net_device *netdev;
 	u32 is_req_pending;
 	s64 resync_req;
 	u32 req_seq;
@@ -568,15 +568,10 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
 	is_req_pending = resync_req;
 
 	if (unlikely(is_req_pending) && req_seq == seq &&
-	    atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) {
-		seq += TLS_HEADER_SIZE - 1;
-		down_read(&device_offload_lock);
-		netdev = tls_ctx->netdev;
-		if (netdev)
-			netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk, seq,
-							      rcd_sn);
-		up_read(&device_offload_lock);
-	}
+	    atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
+		netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk,
+						      seq + TLS_HEADER_SIZE - 1,
+						      rcd_sn);
 }
 
 static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3-59-g8ed1b


From e52972c11d6b1262964db96d65934196db621685 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 4 Jun 2019 12:00:12 -0700
Subject: net/tls: replace the sleeping lock around RX resync with a bit lock

Commit 38030d7cb779 ("net/tls: avoid NULL-deref on resync during device removal")
tried to fix a potential NULL-dereference by taking the
context rwsem.  Unfortunately the RX resync may get called
from soft IRQ, so we can't use the rwsem to protect from
the device disappearing.  Because we are guaranteed there
can be only one resync at a time (it's called from strparser)
use a bit to indicate resync is busy and make device
removal wait for the bit to get cleared.

Note that there is a leftover "flags" field in struct
tls_context already.

Fixes: 4799ac81e52a ("tls: Add rx inline crypto offload")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h    |  4 ++++
 net/tls/tls_device.c | 27 +++++++++++++++++++++------
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 39ea62f0c1f6..4a55ce6a303f 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -209,6 +209,10 @@ struct tls_offload_context_tx {
 	(ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) +        \
 	 TLS_DRIVER_STATE_SIZE)
 
+enum tls_context_flags {
+	TLS_RX_SYNC_RUNNING = 0,
+};
+
 struct cipher_context {
 	char *iv;
 	char *rec_seq;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 49b3a2ff8ef3..1f9cf57d9754 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -550,10 +550,22 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
 	}
 }
 
+static void tls_device_resync_rx(struct tls_context *tls_ctx,
+				 struct sock *sk, u32 seq, u64 rcd_sn)
+{
+	struct net_device *netdev;
+
+	if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags)))
+		return;
+	netdev = READ_ONCE(tls_ctx->netdev);
+	if (netdev)
+		netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk, seq, rcd_sn);
+	clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
+}
+
 void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct net_device *netdev = tls_ctx->netdev;
 	struct tls_offload_context_rx *rx_ctx;
 	u32 is_req_pending;
 	s64 resync_req;
@@ -568,10 +580,10 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
 	is_req_pending = resync_req;
 
 	if (unlikely(is_req_pending) && req_seq == seq &&
-	    atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
-		netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk,
-						      seq + TLS_HEADER_SIZE - 1,
-						      rcd_sn);
+	    atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) {
+		seq += TLS_HEADER_SIZE - 1;
+		tls_device_resync_rx(tls_ctx, sk, seq, rcd_sn);
+	}
 }
 
 static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
@@ -972,7 +984,10 @@ static int tls_device_down(struct net_device *netdev)
 		if (ctx->rx_conf == TLS_HW)
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_RX);
-		ctx->netdev = NULL;
+		WRITE_ONCE(ctx->netdev, NULL);
+		smp_mb__before_atomic(); /* pairs with test_and_set_bit() */
+		while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags))
+			usleep_range(10, 200);
 		dev_put(netdev);
 		list_del_init(&ctx->list);
 
-- 
cgit v1.2.3-59-g8ed1b


From 82ba25c6de200d7a9e9c970c998cdd6dfa8637ae Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale@catalyst.net.nz>
Date: Tue, 4 Jun 2019 13:56:23 +1200
Subject: udp: only choose unbound UDP socket for multicast when not in a VRF

By default, packets received in another VRF should not be passed to an
unbound socket in the default VRF. This patch updates the IPv4 UDP
multicast logic to match the unicast VRF logic (in compute_score()),
as well as the IPv6 mcast logic (in __udp_v6_is_mcast_sock()).

The particular case I noticed was DHCP discover packets going
to the 255.255.255.255 address, which are handled by
__udp4_lib_mcast_deliver(). The previous code meant that running
multiple different DHCP server or relay agent instances across VRFs
did not work correctly - any server/relay agent in the default VRF
received DHCP discover packets for all other VRFs.

Fixes: 6da5b0f027a8 ("net: ensure unbound datagram socket to be chosen when not in a VRF")
Signed-off-by: Tim Beale <timbeale@catalyst.net.nz>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fb250ed53d4..efe9283a8a95 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -538,8 +538,7 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
 	    (inet->inet_dport != rmt_port && inet->inet_dport) ||
 	    (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
 	    ipv6_only_sock(sk) ||
-	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
-	     sk->sk_bound_dev_if != sdif))
+	    !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
 		return false;
 	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
 		return false;
-- 
cgit v1.2.3-59-g8ed1b


From ceae266bf0ae6564ac16d086bf749a096fa90ded Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Tue, 4 Jun 2019 06:07:34 +0000
Subject: net: ipvlan: Fix ipvlan device tso disabled while NETIF_F_IP_CSUM is
 set

There's some NICs, such as hinic, with NETIF_F_IP_CSUM and NETIF_F_TSO
on but NETIF_F_HW_CSUM off. And ipvlan device features will be
NETIF_F_TSO on with NETIF_F_IP_CSUM and NETIF_F_IP_CSUM both off as
IPVLAN_FEATURES only care about NETIF_F_HW_CSUM. So TSO will be
disabled in netdev_fix_features.
For example:
Features for enp129s0f0:
rx-checksumming: on
tx-checksumming: on
        tx-checksum-ipv4: on
        tx-checksum-ip-generic: off [fixed]
        tx-checksum-ipv6: on

Fixes: a188222b6ed2 ("net: Rename NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index bbeb1623e2d5..717fce6edeb7 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -112,7 +112,7 @@ static void ipvlan_port_destroy(struct net_device *dev)
 }
 
 #define IPVLAN_FEATURES \
-	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
+	(NETIF_F_SG | NETIF_F_CSUM_MASK | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
 	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
 	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
-- 
cgit v1.2.3-59-g8ed1b


From fdf71426e7c548d6e4f5e51e0238732d60e05f5f Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 4 Jun 2019 11:44:06 +0200
Subject: net: fix indirect calls helpers for ptype list hooks.

As Eric noted, the current wrapper for ptype func hook inside
__netif_receive_skb_list_ptype() has no chance of avoiding the indirect
call: we enter such code path only for protocols other than ipv4 and
ipv6.

Instead we can wrap the list_func invocation.

v1 -> v2:
 - use the correct fix tag

Fixes: f5737cbadb7d ("net: use indirect calls helpers for ptype hook")
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Edward Cree <ecree@solarflare.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 66f7508825bd..1c4593ec4409 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5025,12 +5025,12 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
 	if (list_empty(head))
 		return;
 	if (pt_prev->list_func != NULL)
-		pt_prev->list_func(head, pt_prev, orig_dev);
+		INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
+				   ip_list_rcv, head, pt_prev, orig_dev);
 	else
 		list_for_each_entry_safe(skb, next, head, list) {
 			skb_list_del_init(skb);
-			INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
-					   skb->dev, pt_prev, orig_dev);
+			pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 		}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 72c87976c5abbf8a834ad85f10d03c0cd58b985c Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 5 Jun 2019 13:48:48 +0200
Subject: s390/qeth: handle limited IPv4 broadcast in L3 TX path

When selecting the cast type of a neighbourless IPv4 skb (eg. on a raw
socket), qeth_l3 falls back to the packet's destination IP address.
For this case we should classify traffic sent to 255.255.255.255 as
broadcast.
This fixes DHCP requests, which were misclassified as unicast
(and for IQD interfaces thus ended up on the wrong HW queue).

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 0271833da6a2..2df67abdfde7 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1911,6 +1911,8 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
 	/* no neighbour (eg AF_PACKET), fall back to target's IP address ... */
 	switch (qeth_get_ip_version(skb)) {
 	case 4:
+		if (ipv4_is_lbcast(ip_hdr(skb)->daddr))
+			return RTN_BROADCAST;
 		return ipv4_is_multicast(ip_hdr(skb)->daddr) ?
 				RTN_MULTICAST : RTN_UNICAST;
 	case 6:
-- 
cgit v1.2.3-59-g8ed1b


From 0cd6783d3c7d40be165d1f3c811cedf0e3dfcdf1 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 5 Jun 2019 13:48:49 +0200
Subject: s390/qeth: check dst entry before use

While qeth_l3 uses netif_keep_dst() to hold onto the dst, a skb's dst
may still have been obsoleted (via dst_dev_put()) by the time that we
end up using it. The dst then points to the loopback interface, which
means the neighbour lookup in qeth_l3_get_cast_type() determines a bogus
cast type of RTN_BROADCAST.
For IQD interfaces this causes us to place such skbs on the wrong
HW queue, resulting in TX errors.

Fix-up the various call sites to first validate the dst entry with
dst_check(), and fall back accordingly.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 2df67abdfde7..13bf3e2e9cea 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1888,13 +1888,20 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 static int qeth_l3_get_cast_type(struct sk_buff *skb)
 {
+	int ipv = qeth_get_ip_version(skb);
 	struct neighbour *n = NULL;
 	struct dst_entry *dst;
 
 	rcu_read_lock();
 	dst = skb_dst(skb);
-	if (dst)
-		n = dst_neigh_lookup_skb(dst, skb);
+	if (dst) {
+		struct rt6_info *rt = (struct rt6_info *) dst;
+
+		dst = dst_check(dst, (ipv == 6) ? rt6_get_cookie(rt) : 0);
+		if (dst)
+			n = dst_neigh_lookup_skb(dst, skb);
+	}
+
 	if (n) {
 		int cast_type = n->type;
 
@@ -1909,7 +1916,7 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
 	rcu_read_unlock();
 
 	/* no neighbour (eg AF_PACKET), fall back to target's IP address ... */
-	switch (qeth_get_ip_version(skb)) {
+	switch (ipv) {
 	case 4:
 		if (ipv4_is_lbcast(ip_hdr(skb)->daddr))
 			return RTN_BROADCAST;
@@ -1942,6 +1949,7 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
 	struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3;
 	struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
 	struct qeth_card *card = queue->card;
+	struct dst_entry *dst;
 
 	hdr->hdr.l3.length = data_len;
 
@@ -1987,15 +1995,27 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
 	}
 
 	rcu_read_lock();
+	dst = skb_dst(skb);
+
 	if (ipv == 4) {
-		struct rtable *rt = skb_rtable(skb);
+		struct rtable *rt;
+
+		if (dst)
+			dst = dst_check(dst, 0);
+		rt = (struct rtable *) dst;
 
 		*((__be32 *) &hdr->hdr.l3.next_hop.ipv4.addr) = (rt) ?
 				rt_nexthop(rt, ip_hdr(skb)->daddr) :
 				ip_hdr(skb)->daddr;
 	} else {
 		/* IPv6 */
-		const struct rt6_info *rt = skb_rt6_info(skb);
+		struct rt6_info *rt;
+
+		if (dst) {
+			rt = (struct rt6_info *) dst;
+			dst = dst_check(dst, rt6_get_cookie(rt));
+		}
+		rt = (struct rt6_info *) dst;
 
 		if (rt && !ipv6_addr_any(&rt->rt6i_gateway))
 			l3_hdr->next_hop.ipv6_addr = rt->rt6i_gateway;
-- 
cgit v1.2.3-59-g8ed1b


From 335726195e460cb6b3f795b695bfd31f0ea70ef0 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Wed, 5 Jun 2019 13:48:50 +0200
Subject: s390/qeth: fix VLAN attribute in bridge_hostnotify udev event

Enabling sysfs attribute bridge_hostnotify triggers a series of udev events
for the MAC addresses of all currently connected peers. In case no VLAN is
set for a peer, the device reports the corresponding MAC addresses with
VLAN ID 4096. This currently results in attribute VLAN=4096 for all
non-VLAN interfaces in the initial series of events after host-notify is
enabled.

Instead, no VLAN attribute should be reported in the udev event for
non-VLAN interfaces.

Only the initial events face this issue. For dynamic changes that are
reported later, the device uses a validity flag.

This also changes the code so that it now sets the VLAN attribute for
MAC addresses with VID 0. On Linux, no qeth interface will ever be
registered with VID 0: Linux kernel registers VID 0 on all network
interfaces initially, but qeth will drop .ndo_vlan_rx_add_vid for VID 0.
Peers with other OSs could register MACs with VID 0.

Fixes: 9f48b9db9a22 ("qeth: bridgeport support - address notifications")
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 218801232ca2..ff8a6cd790b1 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1680,7 +1680,7 @@ static void qeth_bridgeport_an_set_cb(void *priv,
 
 	l2entry = (struct qdio_brinfo_entry_l2 *)entry;
 	code = IPA_ADDR_CHANGE_CODE_MACADDR;
-	if (l2entry->addr_lnid.lnid)
+	if (l2entry->addr_lnid.lnid < VLAN_N_VID)
 		code |= IPA_ADDR_CHANGE_CODE_VLANID;
 	qeth_bridge_emit_host_event(card, anev_reg_unreg, code,
 		(struct net_if_token *)&l2entry->nit,
-- 
cgit v1.2.3-59-g8ed1b


From bd966839bdf9165ee0aefa57132e87e9caf55982 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 5 Jun 2019 13:48:51 +0200
Subject: s390/qeth: handle error when updating TX queue count

netif_set_real_num_tx_queues() can return an error, deal with it.

Fixes: 73dc2daf110f ("s390/qeth: add TX multiqueue support for OSA devices")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 009f2c0ec504..b1823d75dd35 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1274,16 +1274,20 @@ static int qeth_setup_channel(struct qeth_channel *channel, bool alloc_buffers)
 	return 0;
 }
 
-static void qeth_osa_set_output_queues(struct qeth_card *card, bool single)
+static int qeth_osa_set_output_queues(struct qeth_card *card, bool single)
 {
 	unsigned int count = single ? 1 : card->dev->num_tx_queues;
+	int rc;
 
 	rtnl_lock();
-	netif_set_real_num_tx_queues(card->dev, count);
+	rc = netif_set_real_num_tx_queues(card->dev, count);
 	rtnl_unlock();
 
+	if (rc)
+		return rc;
+
 	if (card->qdio.no_out_queues == count)
-		return;
+		return 0;
 
 	if (atomic_read(&card->qdio.state) != QETH_QDIO_UNINITIALIZED)
 		qeth_free_qdio_queues(card);
@@ -1293,12 +1297,14 @@ static void qeth_osa_set_output_queues(struct qeth_card *card, bool single)
 
 	card->qdio.default_out_queue = single ? 0 : QETH_DEFAULT_QUEUE;
 	card->qdio.no_out_queues = count;
+	return 0;
 }
 
 static int qeth_update_from_chp_desc(struct qeth_card *card)
 {
 	struct ccw_device *ccwdev;
 	struct channel_path_desc_fmt0 *chp_dsc;
+	int rc = 0;
 
 	QETH_DBF_TEXT(SETUP, 2, "chp_desc");
 
@@ -1311,12 +1317,12 @@ static int qeth_update_from_chp_desc(struct qeth_card *card)
 
 	if (IS_OSD(card) || IS_OSX(card))
 		/* CHPP field bit 6 == 1 -> single queue */
-		qeth_osa_set_output_queues(card, chp_dsc->chpp & 0x02);
+		rc = qeth_osa_set_output_queues(card, chp_dsc->chpp & 0x02);
 
 	kfree(chp_dsc);
 	QETH_DBF_TEXT_(SETUP, 2, "nr:%x", card->qdio.no_out_queues);
 	QETH_DBF_TEXT_(SETUP, 2, "lvl:%02x", card->info.func_level);
-	return 0;
+	return rc;
 }
 
 static void qeth_init_qdio_info(struct qeth_card *card)
@@ -5597,8 +5603,12 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
 		dev->hw_features |= NETIF_F_SG;
 		dev->vlan_features |= NETIF_F_SG;
 		if (IS_IQD(card)) {
-			netif_set_real_num_tx_queues(dev, QETH_IQD_MIN_TXQ);
 			dev->features |= NETIF_F_SG;
+			if (netif_set_real_num_tx_queues(dev,
+							 QETH_IQD_MIN_TXQ)) {
+				free_netdev(dev);
+				return NULL;
+			}
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 0a90478b93a46bdcd56ba33c37566a993e455d54 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 2 Jun 2019 19:10:24 +0800
Subject: ipv4: not do cache for local delivery if bc_forwarding is enabled

With the topo:

    h1 ---| rp1            |
          |     route  rp3 |--- h3 (192.168.200.1)
    h2 ---| rp2            |

If rp1 bc_forwarding is set while rp2 bc_forwarding is not, after
doing "ping 192.168.200.255" on h1, then ping 192.168.200.255 on
h2, and the packets can still be forwared.

This issue was caused by the input route cache. It should only do
the cache for either bc forwarding or local delivery. Otherwise,
local delivery can use the route cache for bc forwarding of other
interfaces.

This patch is to fix it by not doing cache for local delivery if
all.bc_forwarding is enabled.

Note that we don't fix it by checking route cache local flag after
rt_cache_valid() in "local_input:" and "ip_mkroute_input", as the
common route code shouldn't be touched for bc_forwarding.

Fixes: 5cbf777cfdf6 ("route: add support for directed broadcast forwarding")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11ddc276776e..91bf75b8ac1c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1985,7 +1985,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	u32		itag = 0;
 	struct rtable	*rth;
 	struct flowi4	fl4;
-	bool do_cache;
+	bool do_cache = true;
 
 	/* IP on this device is disabled. */
 
@@ -2062,6 +2062,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (res->type == RTN_BROADCAST) {
 		if (IN_DEV_BFORWARD(in_dev))
 			goto make_route;
+		/* not do cache if bc_forwarding is enabled */
+		if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
+			do_cache = false;
 		goto brd_input;
 	}
 
@@ -2099,18 +2102,15 @@ brd_input:
 	RT_CACHE_STAT_INC(in_brd);
 
 local_input:
-	do_cache = false;
-	if (res->fi) {
-		if (!itag) {
-			struct fib_nh_common *nhc = FIB_RES_NHC(*res);
+	do_cache &= res->fi && !itag;
+	if (do_cache) {
+		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
 
-			rth = rcu_dereference(nhc->nhc_rth_input);
-			if (rt_cache_valid(rth)) {
-				skb_dst_set_noref(skb, &rth->dst);
-				err = 0;
-				goto out;
-			}
-			do_cache = true;
+		rth = rcu_dereference(nhc->nhc_rth_input);
+		if (rt_cache_valid(rth)) {
+			skb_dst_set_noref(skb, &rth->dst);
+			err = 0;
+			goto out;
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From b7999b07726c16974ba9ca3bb9fe98ecbec5f81c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 2 Jun 2019 19:10:46 +0800
Subject: ipv6: fix the check before getting the cookie in rt6_get_cookie

In Jianlin's testing, netperf was broken with 'Connection reset by peer',
as the cookie check failed in rt6_check() and ip6_dst_check() always
returned NULL.

It's caused by Commit 93531c674315 ("net/ipv6: separate handling of FIB
entries from dst based routes"), where the cookie can be got only when
'c1'(see below) for setting dst_cookie whereas rt6_check() is called
when !'c1' for checking dst_cookie, as we can see in ip6_dst_check().

Since in ip6_dst_check() both rt6_dst_from_check() (c1) and rt6_check()
(!c1) will check the 'from' cookie, this patch is to remove the c1 check
in rt6_get_cookie(), so that the dst_cookie can always be set properly.

c1:
  (rt->rt6i_flags & RTF_PCPU || unlikely(!list_empty(&rt->rt6i_uncached)))

Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 525f701653ca..d6d936cbf6b3 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -263,8 +263,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 	rcu_read_lock();
 
 	from = rcu_dereference(rt->from);
-	if (from && (rt->rt6i_flags & RTF_PCPU ||
-	    unlikely(!list_empty(&rt->rt6i_uncached))))
+	if (from)
 		fib6_get_cookie_safe(from, &cookie);
 
 	rcu_read_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From b50e058746ba29f517e27299447831ab3d93f896 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Mon, 3 Jun 2019 08:48:19 -0400
Subject: net: rds: fix memory leak when unload rds_rdma
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When KASAN is enabled, after several rds connections are
created, then "rmmod rds_rdma" is run. The following will
appear.

"
BUG rds_ib_incoming (Not tainted): Objects remaining
in rds_ib_incoming on __kmem_cache_shutdown()

Call Trace:
 dump_stack+0x71/0xab
 slab_err+0xad/0xd0
 __kmem_cache_shutdown+0x17d/0x370
 shutdown_cache+0x17/0x130
 kmem_cache_destroy+0x1df/0x210
 rds_ib_recv_exit+0x11/0x20 [rds_rdma]
 rds_ib_exit+0x7a/0x90 [rds_rdma]
 __x64_sys_delete_module+0x224/0x2c0
 ? __ia32_sys_delete_module+0x2c0/0x2c0
 do_syscall_64+0x73/0x190
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
"
This is rds connection memory leak. The root cause is:
When "rmmod rds_rdma" is run, rds_ib_remove_one will call
rds_ib_dev_shutdown to drop the rds connections.
rds_ib_dev_shutdown will call rds_conn_drop to drop rds
connections as below.
"
rds_conn_path_drop(&conn->c_path[0], false);
"
In the above, destroy is set to false.
void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
{
        atomic_set(&cp->cp_state, RDS_CONN_ERROR);

        rcu_read_lock();
        if (!destroy && rds_destroy_pending(cp->cp_conn)) {
                rcu_read_unlock();
                return;
        }
        queue_work(rds_wq, &cp->cp_down_w);
        rcu_read_unlock();
}
In the above function, destroy is set to false. rds_destroy_pending
is called. This does not move rds connections to ib_nodev_conns.
So destroy is set to true to move rds connections to ib_nodev_conns.
In rds_ib_unregister_client, flush_workqueue is called to make rds_wq
finsh shutdown rds connections. The function rds_ib_destroy_nodev_conns
is called to shutdown rds connections finally.
Then rds_ib_recv_exit is called to destroy slab.

void rds_ib_recv_exit(void)
{
        kmem_cache_destroy(rds_ib_incoming_slab);
        kmem_cache_destroy(rds_ib_frag_slab);
}
The above slab memory leak will not occur again.

>From tests,
256 rds connections
[root@ca-dev14 ~]# time rmmod rds_rdma

real    0m16.522s
user    0m0.000s
sys     0m8.152s
512 rds connections
[root@ca-dev14 ~]# time rmmod rds_rdma

real    0m32.054s
user    0m0.000s
sys     0m15.568s

To rmmod rds_rdma with 256 rds connections, about 16 seconds are needed.
And with 512 rds connections, about 32 seconds are needed.
>From ftrace, when one rds connection is destroyed,

"
 19)               |  rds_conn_destroy [rds]() {
 19)   7.782 us    |    rds_conn_path_drop [rds]();
 15)               |  rds_shutdown_worker [rds]() {
 15)               |    rds_conn_shutdown [rds]() {
 15)   1.651 us    |      rds_send_path_reset [rds]();
 15)   7.195 us    |    }
 15) + 11.434 us   |  }
 19)   2.285 us    |    rds_cong_remove_conn [rds]();
 19) * 24062.76 us |  }
"
So if many rds connections will be destroyed, this function
rds_ib_destroy_nodev_conns uses most of time.

Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.c      | 2 +-
 net/rds/ib_recv.c | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 2da9b75bad16..b8d581b779b2 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -87,7 +87,7 @@ static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev)
 
 	spin_lock_irqsave(&rds_ibdev->spinlock, flags);
 	list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
-		rds_conn_drop(ic->conn);
+		rds_conn_path_drop(&ic->conn->c_path[0], true);
 	spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
 }
 
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 8946c89d7392..3cae88cbdaa0 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -168,6 +168,7 @@ void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
 		list_del(&inc->ii_cache_entry);
 		WARN_ON(!list_empty(&inc->ii_frags));
 		kmem_cache_free(rds_ib_incoming_slab, inc);
+		atomic_dec(&rds_ib_allocation);
 	}
 
 	rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
@@ -1057,6 +1058,8 @@ out:
 
 void rds_ib_recv_exit(void)
 {
+	WARN_ON(atomic_read(&rds_ib_allocation));
+
 	kmem_cache_destroy(rds_ib_incoming_slab);
 	kmem_cache_destroy(rds_ib_frag_slab);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 0a8dd9f67cd0da7dc284f48b032ce00db1a68791 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 3 Jun 2019 16:32:59 -0400
Subject: Fix memory leak in sctp_process_init

syzbot found the following leak in sctp_process_init
BUG: memory leak
unreferenced object 0xffff88810ef68400 (size 1024):
  comm "syz-executor273", pid 7046, jiffies 4294945598 (age 28.770s)
  hex dump (first 32 bytes):
    1d de 28 8d de 0b 1b e3 b5 c2 f9 68 fd 1a 97 25  ..(........h...%
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000a02cebbd>] kmemleak_alloc_recursive include/linux/kmemleak.h:55
[inline]
    [<00000000a02cebbd>] slab_post_alloc_hook mm/slab.h:439 [inline]
    [<00000000a02cebbd>] slab_alloc mm/slab.c:3326 [inline]
    [<00000000a02cebbd>] __do_kmalloc mm/slab.c:3658 [inline]
    [<00000000a02cebbd>] __kmalloc_track_caller+0x15d/0x2c0 mm/slab.c:3675
    [<000000009e6245e6>] kmemdup+0x27/0x60 mm/util.c:119
    [<00000000dfdc5d2d>] kmemdup include/linux/string.h:432 [inline]
    [<00000000dfdc5d2d>] sctp_process_init+0xa7e/0xc20
net/sctp/sm_make_chunk.c:2437
    [<00000000b58b62f8>] sctp_cmd_process_init net/sctp/sm_sideeffect.c:682
[inline]
    [<00000000b58b62f8>] sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1384
[inline]
    [<00000000b58b62f8>] sctp_side_effects net/sctp/sm_sideeffect.c:1194
[inline]
    [<00000000b58b62f8>] sctp_do_sm+0xbdc/0x1d60 net/sctp/sm_sideeffect.c:1165
    [<0000000044e11f96>] sctp_assoc_bh_rcv+0x13c/0x200
net/sctp/associola.c:1074
    [<00000000ec43804d>] sctp_inq_push+0x7f/0xb0 net/sctp/inqueue.c:95
    [<00000000726aa954>] sctp_backlog_rcv+0x5e/0x2a0 net/sctp/input.c:354
    [<00000000d9e249a8>] sk_backlog_rcv include/net/sock.h:950 [inline]
    [<00000000d9e249a8>] __release_sock+0xab/0x110 net/core/sock.c:2418
    [<00000000acae44fa>] release_sock+0x37/0xd0 net/core/sock.c:2934
    [<00000000963cc9ae>] sctp_sendmsg+0x2c0/0x990 net/sctp/socket.c:2122
    [<00000000a7fc7565>] inet_sendmsg+0x64/0x120 net/ipv4/af_inet.c:802
    [<00000000b732cbd3>] sock_sendmsg_nosec net/socket.c:652 [inline]
    [<00000000b732cbd3>] sock_sendmsg+0x54/0x70 net/socket.c:671
    [<00000000274c57ab>] ___sys_sendmsg+0x393/0x3c0 net/socket.c:2292
    [<000000008252aedb>] __sys_sendmsg+0x80/0xf0 net/socket.c:2330
    [<00000000f7bf23d1>] __do_sys_sendmsg net/socket.c:2339 [inline]
    [<00000000f7bf23d1>] __se_sys_sendmsg net/socket.c:2337 [inline]
    [<00000000f7bf23d1>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2337
    [<00000000a8b4131f>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:3

The problem was that the peer.cookie value points to an skb allocated
area on the first pass through this function, at which point it is
overwritten with a heap allocated value, but in certain cases, where a
COOKIE_ECHO chunk is included in the packet, a second pass through
sctp_process_init is made, where the cookie value is re-allocated,
leaking the first allocation.

Fix is to always allocate the cookie value, and free it when we are done
using it.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: syzbot+f7e9153b037eac9b1df8@syzkaller.appspotmail.com
CC: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: netdev@vger.kernel.org
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 13 +++----------
 net/sctp/sm_sideeffect.c |  5 +++++
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 92331e1195c1..f17908f5c4f3 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2312,7 +2312,6 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	union sctp_addr addr;
 	struct sctp_af *af;
 	int src_match = 0;
-	char *cookie;
 
 	/* We must include the address that the INIT packet came from.
 	 * This is the only address that matters for an INIT packet.
@@ -2416,14 +2415,6 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	/* Peer Rwnd   : Current calculated value of the peer's rwnd.  */
 	asoc->peer.rwnd = asoc->peer.i.a_rwnd;
 
-	/* Copy cookie in case we need to resend COOKIE-ECHO. */
-	cookie = asoc->peer.cookie;
-	if (cookie) {
-		asoc->peer.cookie = kmemdup(cookie, asoc->peer.cookie_len, gfp);
-		if (!asoc->peer.cookie)
-			goto clean_up;
-	}
-
 	/* RFC 2960 7.2.1 The initial value of ssthresh MAY be arbitrarily
 	 * high (for example, implementations MAY use the size of the receiver
 	 * advertised window).
@@ -2592,7 +2583,9 @@ do_addr_param:
 	case SCTP_PARAM_STATE_COOKIE:
 		asoc->peer.cookie_len =
 			ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
-		asoc->peer.cookie = param.cookie->body;
+		asoc->peer.cookie = kmemdup(param.cookie->body, asoc->peer.cookie_len, gfp);
+		if (!asoc->peer.cookie)
+			retval = 0;
 		break;
 
 	case SCTP_PARAM_HEARTBEAT_INFO:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 9b50da548db2..a554d6d15d1b 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -883,6 +883,11 @@ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
 						asoc->rto_initial;
 	}
 
+	if (sctp_state(asoc, ESTABLISHED)) {
+		kfree(asoc->peer.cookie);
+		asoc->peer.cookie = NULL;
+	}
+
 	if (sctp_state(asoc, ESTABLISHED) ||
 	    sctp_state(asoc, CLOSED) ||
 	    sctp_state(asoc, SHUTDOWN_RECEIVED)) {
-- 
cgit v1.2.3-59-g8ed1b


From 0ee4e76937d69128a6a66861ba393ebdc2ffc8a2 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@gmail.com>
Date: Mon, 3 Jun 2019 16:57:13 -0400
Subject: ethtool: fix potential userspace buffer overflow

ethtool_get_regs() allocates a buffer of size ops->get_regs_len(),
and pass it to the kernel driver via ops->get_regs() for filling.

There is no restriction about what the kernel drivers can or cannot do
with the open ethtool_regs structure. They usually set regs->version
and ignore regs->len or set it to the same size as ops->get_regs_len().

But if userspace allocates a smaller buffer for the registers dump,
we would cause a userspace buffer overflow in the final copy_to_user()
call, which uses the regs.len value potentially reset by the driver.

To fix this, make this case obvious and store regs.len before calling
ops->get_regs(), to only copy as much data as requested by userspace,
up to the value returned by ops->get_regs_len().

While at it, remove the redundant check for non-null regbuf.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 43e9add58340..1a0196fbb49c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1359,13 +1359,16 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 	if (!regbuf)
 		return -ENOMEM;
 
+	if (regs.len < reglen)
+		reglen = regs.len;
+
 	ops->get_regs(dev, &regs, regbuf);
 
 	ret = -EFAULT;
 	if (copy_to_user(useraddr, &regs, sizeof(regs)))
 		goto out;
 	useraddr += offsetof(struct ethtool_regs, data);
-	if (regbuf && copy_to_user(useraddr, regbuf, regs.len))
+	if (copy_to_user(useraddr, regbuf, reglen))
 		goto out;
 	ret = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 930b9a0543385d4eb8ef887e88cf84d95a844577 Mon Sep 17 00:00:00 2001
From: Nikita Danilov <nikita.danilov@aquantia.com>
Date: Tue, 4 Jun 2019 13:23:49 +0000
Subject: net: aquantia: fix wol configuration not applied sometimes

WoL magic packet configuration sometimes does not work due to
couple of leakages found.

Mainly there was a regression introduced during readx_poll refactoring.

Next, fw request waiting time was too small. Sometimes that
caused sleep proxy config function to return with an error
and to skip WoL configuration.
At last, WoL data were passed to FW from not clean buffer.
That could cause FW to accept garbage as a random configuration data.

Fixes: 6a7f2277313b ("net: aquantia: replace AQ_HW_WAIT_FOR with readx_poll_timeout_atomic")
Signed-off-by: Nikita Danilov <nikita.danilov@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c   | 14 +++++++-------
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c  |  4 +++-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 1208f7ecdd76..3fc41da39a0a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -335,13 +335,13 @@ static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
 {
 	u32 val;
 	int err = 0;
-	bool is_locked;
 
-	is_locked = hw_atl_sem_ram_get(self);
-	if (!is_locked) {
-		err = -ETIME;
+	err = readx_poll_timeout_atomic(hw_atl_sem_ram_get, self,
+					val, val == 1U,
+					10U, 100000U);
+	if (err < 0)
 		goto err_exit;
-	}
+
 	if (IS_CHIP_FEATURE(REVISION_B1)) {
 		u32 offset = 0;
 
@@ -353,8 +353,8 @@ static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
 			/* 1000 times by 10us = 10ms */
 			err = readx_poll_timeout_atomic(hw_atl_scrpad12_get,
 							self, val,
-							(val & 0xF0000000) ==
-							 0x80000000,
+							(val & 0xF0000000) !=
+							0x80000000,
 							10U, 10000U);
 		}
 	} else {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index fbc9d6ac841f..9c16d85fb104 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -384,7 +384,7 @@ static int aq_fw2x_set_sleep_proxy(struct aq_hw_s *self, u8 *mac)
 	err = readx_poll_timeout_atomic(aq_fw2x_state2_get,
 					self, val,
 					val & HW_ATL_FW2X_CTRL_SLEEP_PROXY,
-					1U, 10000U);
+					1U, 100000U);
 
 err_exit:
 	return err;
@@ -404,6 +404,8 @@ static int aq_fw2x_set_wol_params(struct aq_hw_s *self, u8 *mac)
 
 	msg = (struct fw2x_msg_wol *)rpc;
 
+	memset(msg, 0, sizeof(*msg));
+
 	msg->msg_id = HAL_ATLANTIC_UTILS_FW2X_MSG_WOL;
 	msg->magic_packet_enabled = true;
 	memcpy(msg->hw_addr, mac, ETH_ALEN);
-- 
cgit v1.2.3-59-g8ed1b


From 4970b42d5c362bf873982db7d93245c5281e58f4 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 5 Jun 2019 12:27:14 +0800
Subject: Revert "fib_rules: return 0 directly if an exactly same rule exists
 when NLM_F_EXCL not supplied"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit e9919a24d3022f72bcadc407e73a6ef17093a849.

Nathan reported the new behaviour breaks Android, as Android just add
new rules and delete old ones.

If we return 0 without adding dup rules, Android will remove the new
added rules and causing system to soft-reboot.

Fixes: e9919a24d302 ("fib_rules: return 0 directly if an exactly same rule exists when NLM_F_EXCL not supplied")
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Reported-by: Yaro Slav <yaro330@gmail.com>
Reported-by: Maciej Żenczykowski <zenczykowski@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 43f0115cce9c..18f8dd8329ed 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -757,9 +757,9 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		goto errout;
 
-	if (rule_exists(ops, frh, tb, rule)) {
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
-			err = -EEXIST;
+	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
+	    rule_exists(ops, frh, tb, rule)) {
+		err = -EEXIST;
 		goto errout_free;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 59e3e4b52663a9d97efbce7307f62e4bc5c9ce91 Mon Sep 17 00:00:00 2001
From: Olivier Matz <olivier.matz@6wind.com>
Date: Thu, 6 Jun 2019 09:15:18 +0200
Subject: ipv6: use READ_ONCE() for inet->hdrincl as in ipv4

As it was done in commit 8f659a03a0ba ("net: ipv4: fix for a race
condition in raw_sendmsg") and commit 20b50d79974e ("net: ipv4: emulate
READ_ONCE() on ->hdrincl bit-field in raw_sendmsg()") for ipv4, copy the
value of inet->hdrincl in a local variable, to avoid introducing a race
condition in the next commit.

Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 96a3559f2a09..af2f9a833c4e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -783,6 +783,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct flowi6 fl6;
 	struct ipcm6_cookie ipc6;
 	int addr_len = msg->msg_namelen;
+	int hdrincl;
 	u16 proto;
 	int err;
 
@@ -796,6 +797,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
+	/* hdrincl should be READ_ONCE(inet->hdrincl)
+	 * but READ_ONCE() doesn't work with bit fields.
+	 * Doing this indirectly yields the same result.
+	 */
+	hdrincl = inet->hdrincl;
+	hdrincl = READ_ONCE(hdrincl);
+
 	/*
 	 *	Get and verify the address.
 	 */
@@ -908,7 +916,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		fl6.flowi6_oif = np->ucast_oif;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	if (inet->hdrincl)
+	if (hdrincl)
 		fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
 
 	if (ipc6.tclass < 0)
@@ -931,7 +939,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		goto do_confirm;
 
 back_from_confirm:
-	if (inet->hdrincl)
+	if (hdrincl)
 		err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst,
 					msg->msg_flags, &ipc6.sockc);
 	else {
-- 
cgit v1.2.3-59-g8ed1b


From b9aa52c4cb457e7416cc0c95f475e72ef4a61336 Mon Sep 17 00:00:00 2001
From: Olivier Matz <olivier.matz@6wind.com>
Date: Thu, 6 Jun 2019 09:15:19 +0200
Subject: ipv6: fix EFAULT on sendto with icmpv6 and hdrincl

The following code returns EFAULT (Bad address):

  s = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6);
  setsockopt(s, SOL_IPV6, IPV6_HDRINCL, 1);
  sendto(ipv6_icmp6_packet, addr);   /* returns -1, errno = EFAULT */

The IPv4 equivalent code works. A workaround is to use IPPROTO_RAW
instead of IPPROTO_ICMPV6.

The failure happens because 2 bytes are eaten from the msghdr by
rawv6_probe_proto_opt() starting from commit 19e3c66b52ca ("ipv6
equivalent of "ipv4: Avoid reading user iov twice after
raw_probe_proto_opt""), but at that time it was not a problem because
IPV6_HDRINCL was not yet introduced.

Only eat these 2 bytes if hdrincl == 0.

Fixes: 715f504b1189 ("ipv6: add IPV6_HDRINCL option for raw sockets")
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/raw.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index af2f9a833c4e..1bb88b4b677b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -895,11 +895,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	opt = ipv6_fixup_options(&opt_space, opt);
 
 	fl6.flowi6_proto = proto;
-	rfv.msg = msg;
-	rfv.hlen = 0;
-	err = rawv6_probe_proto_opt(&rfv, &fl6);
-	if (err)
-		goto out;
+
+	if (!hdrincl) {
+		rfv.msg = msg;
+		rfv.hlen = 0;
+		err = rawv6_probe_proto_opt(&rfv, &fl6);
+		if (err)
+			goto out;
+	}
 
 	if (!ipv6_addr_any(daddr))
 		fl6.daddr = *daddr;
-- 
cgit v1.2.3-59-g8ed1b


From 85cb928787eab6a2f4ca9d2a798b6f3bed53ced1 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Thu, 6 Jun 2019 04:00:03 -0400
Subject: net: rds: fix memory leak in rds_ib_flush_mr_pool

When the following tests last for several hours, the problem will occur.

Server:
    rds-stress -r 1.1.1.16 -D 1M
Client:
    rds-stress -r 1.1.1.14 -s 1.1.1.16 -D 1M -T 30

The following will occur.

"
Starting up....
tsks   tx/s   rx/s  tx+rx K/s    mbi K/s    mbo K/s tx us/c   rtt us cpu
%
  1      0      0       0.00       0.00       0.00    0.00 0.00 -1.00
  1      0      0       0.00       0.00       0.00    0.00 0.00 -1.00
  1      0      0       0.00       0.00       0.00    0.00 0.00 -1.00
  1      0      0       0.00       0.00       0.00    0.00 0.00 -1.00
"
>From vmcore, we can find that clean_list is NULL.

>From the source code, rds_mr_flushd calls rds_ib_mr_pool_flush_worker.
Then rds_ib_mr_pool_flush_worker calls
"
 rds_ib_flush_mr_pool(pool, 0, NULL);
"
Then in function
"
int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
                         int free_all, struct rds_ib_mr **ibmr_ret)
"
ibmr_ret is NULL.

In the source code,
"
...
list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
if (ibmr_ret)
        *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);

/* more than one entry in llist nodes */
if (clean_nodes->next)
        llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list);
...
"
When ibmr_ret is NULL, llist_entry is not executed. clean_nodes->next
instead of clean_nodes is added in clean_list.
So clean_nodes is discarded. It can not be used again.
The workqueue is executed periodically. So more and more clean_nodes are
discarded. Finally the clean_list is NULL.
Then this problem will occur.

Fixes: 1bc144b62524 ("net, rds, Replace xlist in net/rds/xlist.h with llist")
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_rdma.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index d664e9ade74d..0b347f46b2f4 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -428,12 +428,14 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
 		wait_clean_list_grace();
 
 		list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
-		if (ibmr_ret)
+		if (ibmr_ret) {
 			*ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
-
+			clean_nodes = clean_nodes->next;
+		}
 		/* more than one entry in llist nodes */
-		if (clean_nodes->next)
-			llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list);
+		if (clean_nodes)
+			llist_add_batch(clean_nodes, clean_tail,
+					&pool->clean_list);
 
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d37acd5aa99c57505b64913e0e2624ec3daed8c5 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Thu, 6 Jun 2019 10:42:56 +0200
Subject: net: mvpp2: Use strscpy to handle stat strings

Use a safe strscpy call to copy the ethtool stat strings into the
relevant buffers, instead of a memcpy that will be accessing
out-of-bound data.

Fixes: 118d6298f6f0 ("net: mvpp2: add ethtool GOP statistics")
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 7a67e23a2c2b..d8e5241097a9 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -1304,8 +1304,8 @@ static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset,
 		int i;
 
 		for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++)
-			memcpy(data + i * ETH_GSTRING_LEN,
-			       &mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN);
+			strscpy(data + i * ETH_GSTRING_LEN,
+			        mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 720f1de4021f09898b8c8443f3b3e995991b6e3a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 6 Jun 2019 15:45:03 +0200
Subject: pktgen: do not sleep with the thread lock held.

Currently, the process issuing a "start" command on the pktgen procfs
interface, acquires the pktgen thread lock and never release it, until
all pktgen threads are completed. The above can blocks indefinitely any
other pktgen command and any (even unrelated) netdevice removal - as
the pktgen netdev notifier acquires the same lock.

The issue is demonstrated by the following script, reported by Matteo:

ip -b - <<'EOF'
	link add type dummy
	link add type veth
	link set dummy0 up
EOF
modprobe pktgen
echo reset >/proc/net/pktgen/pgctrl
{
	echo rem_device_all
	echo add_device dummy0
} >/proc/net/pktgen/kpktgend_0
echo count 0 >/proc/net/pktgen/dummy0
echo start >/proc/net/pktgen/pgctrl &
sleep 1
rmmod veth

Fix the above releasing the thread lock around the sleep call.

Additionally we must prevent racing with forcefull rmmod - as the
thread lock no more protects from them. Instead, acquire a self-reference
before waiting for any thread. As a side effect, running

rmmod pktgen

while some thread is running now fails with "module in use" error,
before this patch such command hanged indefinitely.

Note: the issue predates the commit reported in the fixes tag, but
this fix can't be applied before the mentioned commit.

v1 -> v2:
 - no need to check for thread existence after flipping the lock,
   pktgen threads are freed only at net exit time
 -

Fixes: 6146e6a43b35 ("[PKTGEN]: Removes thread_{un,}lock() macros.")
Reported-and-tested-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 319ad5490fb3..a33d03c05ed6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3066,7 +3066,13 @@ static int pktgen_wait_thread_run(struct pktgen_thread *t)
 {
 	while (thread_is_running(t)) {
 
+		/* note: 't' will still be around even after the unlock/lock
+		 * cycle because pktgen_thread threads are only cleared at
+		 * net exit
+		 */
+		mutex_unlock(&pktgen_thread_lock);
 		msleep_interruptible(100);
+		mutex_lock(&pktgen_thread_lock);
 
 		if (signal_pending(current))
 			goto signal;
@@ -3081,6 +3087,10 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
 	struct pktgen_thread *t;
 	int sig = 1;
 
+	/* prevent from racing with rmmod */
+	if (!try_module_get(THIS_MODULE))
+		return sig;
+
 	mutex_lock(&pktgen_thread_lock);
 
 	list_for_each_entry(t, &pn->pktgen_threads, th_list) {
@@ -3094,6 +3104,7 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
 			t->control |= (T_STOP);
 
 	mutex_unlock(&pktgen_thread_lock);
+	module_put(THIS_MODULE);
 	return sig;
 }
 
-- 
cgit v1.2.3-59-g8ed1b